summaryrefslogtreecommitdiffstats
path: root/sax
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /sax
parentInitial commit. (diff)
downloadlibreoffice-upstream.tar.xz
libreoffice-upstream.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sax')
-rw-r--r--sax/CppunitTest_sax.mk30
-rw-r--r--sax/CppunitTest_sax_attributes.mk29
-rw-r--r--sax/CppunitTest_sax_parser.mk49
-rw-r--r--sax/CppunitTest_sax_xmlimport.mk57
-rw-r--r--sax/IwyuFilter_sax.yaml2
-rw-r--r--sax/Library_expwrap.mk52
-rw-r--r--sax/Library_sax.mk42
-rw-r--r--sax/Makefile7
-rw-r--r--sax/Module_sax.mk24
-rw-r--r--sax/README.md15
-rw-r--r--sax/README.vars5
-rw-r--r--sax/inc/xml2utf.hxx129
-rw-r--r--sax/qa/cppunit/attributes.cxx75
-rw-r--r--sax/qa/cppunit/parser.cxx99
-rw-r--r--sax/qa/cppunit/test_converter.cxx623
-rw-r--r--sax/qa/cppunit/xmlimport.cxx454
-rw-r--r--sax/qa/data/defaultns.xml11
-rw-r--r--sax/qa/data/inlinens.xml12
-rw-r--r--sax/qa/data/manifestwithnsdecl.xml12
-rw-r--r--sax/qa/data/manifestwithoutnsdecl.xml12
-rw-r--r--sax/qa/data/multiplens.xml13
-rw-r--r--sax/qa/data/multiplepfx.xml9
-rw-r--r--sax/qa/data/nestedns.xml32
-rw-r--r--sax/qa/data/nstoattributes.xml17
-rw-r--r--sax/qa/data/simple.xml11
-rw-r--r--sax/qa/data/testthreading.xml5
-rw-r--r--sax/source/expatwrap/expwrap.component38
-rw-r--r--sax/source/expatwrap/sax_expat.cxx961
-rw-r--r--sax/source/expatwrap/saxwriter.cxx1486
-rw-r--r--sax/source/expatwrap/xml2utf.cxx519
-rw-r--r--sax/source/fastparser/fastparser.cxx1693
-rw-r--r--sax/source/fastparser/legacyfastparser.cxx375
-rw-r--r--sax/source/tools/CachedOutputStream.hxx118
-rw-r--r--sax/source/tools/converter.cxx2535
-rw-r--r--sax/source/tools/fastattribs.cxx336
-rw-r--r--sax/source/tools/fastserializer.cxx845
-rw-r--r--sax/source/tools/fastserializer.hxx255
-rw-r--r--sax/source/tools/fshelper.cxx155
-rw-r--r--sax/test/makefile.mk58
-rw-r--r--sax/test/sax/exports.dxp2
-rw-r--r--sax/test/sax/factory.hxx80
-rw-r--r--sax/test/sax/makefile.mk52
-rw-r--r--sax/test/sax/testsax.cxx795
-rw-r--r--sax/test/sax/testwriter.cxx662
-rw-r--r--sax/test/saxdemo.cxx626
-rw-r--r--sax/test/testcomponent.cxx223
46 files changed, 13640 insertions, 0 deletions
diff --git a/sax/CppunitTest_sax.mk b/sax/CppunitTest_sax.mk
new file mode 100644
index 000000000..282ba2414
--- /dev/null
+++ b/sax/CppunitTest_sax.mk
@@ -0,0 +1,30 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sax))
+
+$(eval $(call gb_CppunitTest_use_sdk_api,sax))
+
+$(eval $(call gb_CppunitTest_use_external,sax,boost_headers))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax))
+
+$(eval $(call gb_CppunitTest_use_libraries,sax, \
+ sax \
+ sal \
+ comphelper \
+ cppu \
+))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sax, \
+ sax/qa/cppunit/test_converter \
+))
+
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/CppunitTest_sax_attributes.mk b/sax/CppunitTest_sax_attributes.mk
new file mode 100644
index 000000000..198629814
--- /dev/null
+++ b/sax/CppunitTest_sax_attributes.mk
@@ -0,0 +1,29 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sax_attributes))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax_attributes))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sax_attributes, \
+ sax/qa/cppunit/attributes \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sax_attributes, \
+ cppu \
+ cppuhelper \
+ sal \
+ sax \
+))
+
+$(eval $(call gb_CppunitTest_use_sdk_api,sax_attributes))
+
+$(eval $(call gb_CppunitTest_use_ure,sax_attributes))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/CppunitTest_sax_parser.mk b/sax/CppunitTest_sax_parser.mk
new file mode 100644
index 000000000..59a90fd13
--- /dev/null
+++ b/sax/CppunitTest_sax_parser.mk
@@ -0,0 +1,49 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sax_parser))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax_parser))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sax_parser, \
+ sax/qa/cppunit/parser \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sax_parser, \
+ comphelper \
+ cppu \
+ cppuhelper \
+ unotest \
+ expwrap \
+ sal \
+ sax \
+ test \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sax_parser, \
+ boost_headers \
+))
+
+$(eval $(call gb_CppunitTest_use_sdk_api,sax_parser))
+
+$(eval $(call gb_CppunitTest_use_ure,sax_parser))
+$(eval $(call gb_CppunitTest_use_vcl,sax_parser))
+
+$(eval $(call gb_CppunitTest_use_components,sax_parser,\
+ configmgr/source/configmgr \
+ framework/util/fwk \
+ i18npool/util/i18npool \
+ sfx2/util/sfx \
+ ucb/source/core/ucb1 \
+ ucb/source/ucp/file/ucpfile1 \
+))
+
+$(eval $(call gb_CppunitTest_use_configuration,sax_parser))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/CppunitTest_sax_xmlimport.mk b/sax/CppunitTest_sax_xmlimport.mk
new file mode 100644
index 000000000..2e1073632
--- /dev/null
+++ b/sax/CppunitTest_sax_xmlimport.mk
@@ -0,0 +1,57 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,sax_xmlimport))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax_xmlimport))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sax_xmlimport, \
+sax/qa/cppunit/xmlimport \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sax_xmlimport, \
+ test \
+ cppu \
+ comphelper \
+ sal \
+ cppuhelper \
+ sax \
+ unotest \
+ utl \
+ salhelper \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sax_xmlimport,\
+ offapi \
+ udkapi \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sax_xmlimport, \
+ boost_headers \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sax_xmlimport))
+$(eval $(call gb_CppunitTest_use_vcl,sax_xmlimport))
+
+$(eval $(call gb_CppunitTest_set_include,sax_xmlimport,\
+ -I$(SRCDIR)/sax/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_components,sax_xmlimport,\
+ configmgr/source/configmgr \
+ sax/source/expatwrap/expwrap \
+ ucb/source/core/ucb1 \
+ ucb/source/ucp/file/ucpfile1 \
+ uui/util/uui \
+))
+
+$(eval $(call gb_CppunitTest_use_configuration,sax_xmlimport))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/IwyuFilter_sax.yaml b/sax/IwyuFilter_sax.yaml
new file mode 100644
index 000000000..986dcdc98
--- /dev/null
+++ b/sax/IwyuFilter_sax.yaml
@@ -0,0 +1,2 @@
+---
+assumeFilename: sax/source/fastparser/fastparser.cxx
diff --git a/sax/Library_expwrap.mk b/sax/Library_expwrap.mk
new file mode 100644
index 000000000..3f79ba31b
--- /dev/null
+++ b/sax/Library_expwrap.mk
@@ -0,0 +1,52 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Library_Library,expwrap))
+
+$(eval $(call gb_Library_set_componentfile,expwrap,sax/source/expatwrap/expwrap,services))
+
+$(eval $(call gb_Library_set_include,expwrap,\
+ -I$(SRCDIR)/sax/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_Library_use_common_precompiled_header,expwrap))
+
+$(eval $(call gb_Library_add_defs,expwrap,\
+ -DFASTSAX_DLLIMPLEMENTATION \
+))
+
+$(eval $(call gb_Library_use_sdk_api,expwrap))
+
+$(eval $(call gb_Library_use_externals,expwrap,\
+ boost_headers \
+ libxml2 \
+ expat \
+ zlib \
+))
+
+$(eval $(call gb_Library_use_libraries,expwrap,\
+ comphelper \
+ cppu \
+ cppuhelper \
+ sal \
+ salhelper \
+ sax \
+ tl \
+))
+
+$(eval $(call gb_Library_add_exception_objects,expwrap,\
+ sax/source/expatwrap/sax_expat \
+ sax/source/expatwrap/saxwriter \
+ sax/source/expatwrap/xml2utf \
+ sax/source/fastparser/fastparser \
+ sax/source/fastparser/legacyfastparser \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/Library_sax.mk b/sax/Library_sax.mk
new file mode 100644
index 000000000..6e84a0426
--- /dev/null
+++ b/sax/Library_sax.mk
@@ -0,0 +1,42 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Library_Library,sax))
+
+$(eval $(call gb_Library_set_include,sax,\
+ -I$(SRCDIR)/sax/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_Library_use_external,sax,boost_headers))
+
+$(eval $(call gb_Library_use_common_precompiled_header,sax))
+
+$(eval $(call gb_Library_use_sdk_api,sax))
+
+$(eval $(call gb_Library_use_libraries,sax,\
+ comphelper \
+ cppu \
+ cppuhelper \
+ sal \
+ tl \
+))
+
+$(eval $(call gb_Library_add_defs,sax,\
+ -DSAX_DLLIMPLEMENTATION \
+))
+
+$(eval $(call gb_Library_add_exception_objects,sax,\
+ sax/source/tools/converter \
+ sax/source/tools/fastattribs \
+ sax/source/tools/fastserializer \
+ sax/source/tools/fshelper \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/Makefile b/sax/Makefile
new file mode 100644
index 000000000..ccb1c85a0
--- /dev/null
+++ b/sax/Makefile
@@ -0,0 +1,7 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+
+module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
+
+include $(module_directory)/../solenv/gbuild/partial_build.mk
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/Module_sax.mk b/sax/Module_sax.mk
new file mode 100644
index 000000000..4fca4efdc
--- /dev/null
+++ b/sax/Module_sax.mk
@@ -0,0 +1,24 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Module_Module,sax))
+
+$(eval $(call gb_Module_add_targets,sax,\
+ Library_expwrap \
+ Library_sax \
+))
+
+$(eval $(call gb_Module_add_check_targets,sax,\
+ CppunitTest_sax \
+ CppunitTest_sax_attributes \
+ CppunitTest_sax_parser \
+ CppunitTest_sax_xmlimport \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sax/README.md b/sax/README.md
new file mode 100644
index 000000000..30ae23197
--- /dev/null
+++ b/sax/README.md
@@ -0,0 +1,15 @@
+# UNO Services for SAX
+
+UNO services for SAX parsing and C++ functions for XMLSchema-2 data types.
+
+* `source/expwrap`:
+ string-based SAX parser UNO service wrapping expat
+* `source/fastparser`:
+ multi-threaded token-based SAX parser UNO service wrapping libxml2
+* `source/tools`:
+ + C++ wrapper for fast SAX parser
+ + C++ XMLSchema-2 data type conversion helpers
+
+Multi-threading in FastParser can be disabled for debugging purposes with:
+
+ SAX_DISABLE_THREADS=1 SAL_LOG="+INFO.sax.fastparser+WARN"
diff --git a/sax/README.vars b/sax/README.vars
new file mode 100644
index 000000000..35d0bccd0
--- /dev/null
+++ b/sax/README.vars
@@ -0,0 +1,5 @@
+Environment variables in sax:
+
+- fastparser:
+
+ - SAX_DISABLE_THREADS - don't thread sax parsing
diff --git a/sax/inc/xml2utf.hxx b/sax/inc/xml2utf.hxx
new file mode 100644
index 000000000..ead6ac114
--- /dev/null
+++ b/sax/inc/xml2utf.hxx
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SAX_INC_XML2UTF_HXX
+#define INCLUDED_SAX_INC_XML2UTF_HXX
+
+#include <sal/config.h>
+
+#include <memory>
+
+#include <sal/types.h>
+#include <rtl/string.hxx>
+
+#include <com/sun/star/io/XInputStream.hpp>
+
+namespace sax_expatwrap {
+
+class Text2UnicodeConverter
+{
+
+public:
+ Text2UnicodeConverter( const OString & sEncoding );
+ ~Text2UnicodeConverter();
+
+ css::uno::Sequence < sal_Unicode > convert( const css::uno::Sequence<sal_Int8> & );
+ bool canContinue() const { return m_bCanContinue; }
+
+private:
+ void init( rtl_TextEncoding encoding );
+
+ rtl_TextToUnicodeConverter m_convText2Unicode;
+ rtl_TextToUnicodeContext m_contextText2Unicode;
+ bool m_bCanContinue;
+ bool m_bInitialized;
+ css::uno::Sequence<sal_Int8> m_seqSource;
+};
+
+/*----------------------------------------
+*
+* Unicode2TextConverter
+*
+**-----------------------------------------*/
+class Unicode2TextConverter
+{
+public:
+ Unicode2TextConverter( rtl_TextEncoding encoding );
+ ~Unicode2TextConverter();
+
+ css::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
+
+private:
+ rtl_UnicodeToTextConverter m_convUnicode2Text;
+ rtl_UnicodeToTextContext m_contextUnicode2Text;
+ css::uno::Sequence<sal_Unicode> m_seqSource;
+};
+
+
+/*----------------------------------------
+*
+* XMLFile2UTFConverter
+*
+**-----------------------------------------*/
+class XMLFile2UTFConverter
+{
+public:
+ XMLFile2UTFConverter( ):
+ m_bStarted( false )
+ {}
+
+ void setInputStream( css::uno::Reference< css::io::XInputStream > const &r ) { m_in = r; }
+ void setEncoding( const OString &s ) { m_sEncoding = s; }
+
+
+ // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
+ // may be returned less or more bytes than ordered.
+ /// @throws css::io::IOException
+ /// @throws css::io::NotConnectedException
+ /// @throws css::io::BufferSizeExceededException
+ /// @throws css::uno::RuntimeException
+ sal_Int32 readAndConvert( css::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead );
+
+private:
+
+ // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
+ // @return TRUE, when encoding information could be retrieved
+ // @return FALSE, when no encoding information was found in file
+ bool scanForEncoding( css::uno::Sequence<sal_Int8> &seq );
+
+ // Called only on first Sequence of bytes. Tries to figure out
+ // if enough data is available to scan encoding
+ // @return TRUE, when encoding is retrievable
+ // @return FALSE, when more data is needed
+ static bool isEncodingRecognizable( const css::uno::Sequence< sal_Int8 > & seq );
+
+ // When encoding attribute is within the text (in the first line), it is removed.
+ static void removeEncoding( css::uno::Sequence<sal_Int8> &seq );
+
+ // Initializes decoding depending on m_sEncoding setting
+ void initializeDecoding();
+private:
+ css::uno::Reference< css::io::XInputStream > m_in;
+
+ bool m_bStarted;
+ OString m_sEncoding;
+
+ std::unique_ptr<Text2UnicodeConverter> m_pText2Unicode;
+ std::unique_ptr<Unicode2TextConverter> m_pUnicode2Text;
+};
+}
+
+#endif // INCLUDED_SAX_INC_XML2UTF_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/qa/cppunit/attributes.cxx b/sax/qa/cppunit/attributes.cxx
new file mode 100644
index 000000000..6c71e9aa1
--- /dev/null
+++ b/sax/qa/cppunit/attributes.cxx
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/types.h>
+#include <com/sun/star/xml/sax/SAXException.hpp>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+
+#include <rtl/ref.hxx>
+#include <sax/fastattribs.hxx>
+
+using namespace css;
+using namespace css::xml;
+
+namespace {
+
+class AttributesTest: public CppUnit::TestFixture
+{
+public:
+ void test();
+
+ CPPUNIT_TEST_SUITE( AttributesTest );
+ CPPUNIT_TEST( test );
+ CPPUNIT_TEST_SUITE_END();
+};
+
+void AttributesTest::test()
+{
+ rtl::Reference<sax_fastparser::FastAttributeList> xAttributeList( new sax_fastparser::FastAttributeList(nullptr) );
+ xAttributeList->add(1, "1");
+ xAttributeList->add(2, OString("2"));
+
+ // We can't test getValueToken() and getOptionalValueToken()
+ // without XFastTokenHandler :-(
+ // Uncomment to get segmentation fault:
+ // xAttributeList->getOptionalValueToken(1, 0);
+ // xAttributeList->getValueToken(2);
+
+ CPPUNIT_ASSERT( xAttributeList->hasAttribute(1) );
+ CPPUNIT_ASSERT( !xAttributeList->hasAttribute(3) );
+
+ CPPUNIT_ASSERT_EQUAL( OUString("2"), xAttributeList->getOptionalValue(2) );
+ CPPUNIT_ASSERT_EQUAL( OUString(), xAttributeList->getOptionalValue(3) );
+
+ CPPUNIT_ASSERT_EQUAL( OUString("1"), xAttributeList->getValue(1) );
+ CPPUNIT_ASSERT_THROW( xAttributeList->getValue(3), xml::sax::SAXException );
+
+ xAttributeList->addUnknown("a", "a");
+ xAttributeList->addUnknown("b", "b", "b");
+ xAttributeList->addUnknown("c", "c");
+ CPPUNIT_ASSERT_EQUAL( sal_Int32(3), xAttributeList->getUnknownAttributes().getLength() );
+
+ CPPUNIT_ASSERT_EQUAL( sal_Int32(2), xAttributeList->getFastAttributes().getLength() );
+
+ xAttributeList->clear();
+ CPPUNIT_ASSERT( !xAttributeList->hasAttribute(1) );
+ CPPUNIT_ASSERT( !xAttributeList->getFastAttributes().hasElements() );
+ xAttributeList->addUnknown("c", "c");
+ CPPUNIT_ASSERT_EQUAL( sal_Int32(1), xAttributeList->getUnknownAttributes().getLength() );
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION( AttributesTest );
+
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/qa/cppunit/parser.cxx b/sax/qa/cppunit/parser.cxx
new file mode 100644
index 000000000..261091fe8
--- /dev/null
+++ b/sax/qa/cppunit/parser.cxx
@@ -0,0 +1,99 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/config.h>
+
+#include <com/sun/star/io/Pipe.hpp>
+#include <com/sun/star/xml/sax/FastToken.hpp>
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+
+#include <sax/fastparser.hxx>
+#include <sax/fastattribs.hxx>
+#include <test/bootstrapfixture.hxx>
+#include <rtl/ref.hxx>
+
+using namespace css;
+using namespace css::xml::sax;
+
+namespace {
+
+class DummyTokenHandler : public sax_fastparser::FastTokenHandlerBase
+{
+public:
+ DummyTokenHandler() {}
+
+ virtual sal_Int32 SAL_CALL getTokenFromUTF8( const uno::Sequence<sal_Int8>& ) override
+ {
+ return FastToken::DONTKNOW;
+ }
+ virtual uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 ) override
+ {
+ CPPUNIT_ASSERT_MESSAGE( "getUTF8Identifier: unexpected call", false );
+ return uno::Sequence<sal_Int8>();
+ }
+ virtual sal_Int32 getTokenDirect( const char * /* pToken */, sal_Int32 /* nLength */ ) const override
+ {
+ return -1;
+ }
+};
+
+class ParserTest: public test::BootstrapFixture
+{
+ InputSource maInput;
+ rtl::Reference< sax_fastparser::FastSaxParser > mxParser;
+ rtl::Reference< DummyTokenHandler > mxTokenHandler;
+
+public:
+ virtual void setUp() override;
+
+ void parse();
+
+ CPPUNIT_TEST_SUITE(ParserTest);
+ CPPUNIT_TEST(parse);
+ CPPUNIT_TEST_SUITE_END();
+
+private:
+ uno::Reference< io::XInputStream > createStream(const OString& sInput);
+};
+
+void ParserTest::setUp()
+{
+ test::BootstrapFixture::setUp();
+ mxTokenHandler.set( new DummyTokenHandler() );
+ mxParser.set( new sax_fastparser::FastSaxParser() );
+ mxParser->setTokenHandler( mxTokenHandler );
+}
+
+uno::Reference< io::XInputStream > ParserTest::createStream(const OString& sInput)
+{
+ uno::Reference< io::XOutputStream > xPipe( io::Pipe::create(m_xContext) );
+ uno::Reference< io::XInputStream > xInStream( xPipe, uno::UNO_QUERY );
+ uno::Sequence< sal_Int8 > aSeq( reinterpret_cast<sal_Int8 const *>(sInput.getStr()), sInput.getLength() );
+ xPipe->writeBytes( aSeq );
+ xPipe->flush();
+ xPipe->closeOutput();
+ return xInStream;
+}
+
+void ParserTest::parse()
+{
+ maInput.aInputStream = createStream("<a>...<b />..</a>");
+ mxParser->parseStream( maInput );
+
+ maInput.aInputStream = createStream("<b></a>");
+ CPPUNIT_ASSERT_THROW( mxParser->parseStream( maInput ), css::xml::sax::SAXParseException );
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(ParserTest);
+
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/qa/cppunit/test_converter.cxx b/sax/qa/cppunit/test_converter.cxx
new file mode 100644
index 000000000..525e110c1
--- /dev/null
+++ b/sax/qa/cppunit/test_converter.cxx
@@ -0,0 +1,623 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <limits>
+
+#include <sal/types.h>
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+
+#include <rtl/ustrbuf.hxx>
+
+#include <com/sun/star/util/DateTime.hpp>
+#include <com/sun/star/util/Duration.hpp>
+#include <com/sun/star/util/MeasureUnit.hpp>
+
+#include <sax/tools/converter.hxx>
+#include <sal/log.hxx>
+
+
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::util;
+using sax::Converter;
+
+
+namespace {
+
+class ConverterTest
+ : public ::CppUnit::TestFixture
+{
+public:
+
+ void testDuration();
+ void testDateTime();
+ void testTime();
+ void testDouble();
+ void testMeasure();
+ void testBool();
+ void testPercent();
+ void testColor();
+ void testNumber();
+
+ CPPUNIT_TEST_SUITE(ConverterTest);
+ CPPUNIT_TEST(testDuration);
+ CPPUNIT_TEST(testDateTime);
+ CPPUNIT_TEST(testTime);
+ CPPUNIT_TEST(testDouble);
+ CPPUNIT_TEST(testMeasure);
+ CPPUNIT_TEST(testBool);
+ CPPUNIT_TEST(testPercent);
+ CPPUNIT_TEST(testColor);
+ CPPUNIT_TEST(testNumber);
+ CPPUNIT_TEST_SUITE_END();
+
+private:
+};
+
+void doTest(util::Duration const & rid, char const*const pis,
+ char const*const i_pos = nullptr)
+{
+ char const*const pos(i_pos ? i_pos : pis);
+ util::Duration od;
+ OUString is(OUString::createFromAscii(pis));
+ SAL_INFO("sax.cppunit","about to convert '" << is << "'");
+ bool bSuccess = Converter::convertDuration(od, is);
+ SAL_INFO("sax.cppunit","" << (od.Negative ? "-" : "+") << " " << od.Years << "Y " << od.Months << "M " << od.Days << "D " << od.Hours << "H " << od.Minutes << "M " << od.Seconds << "S " << od.NanoSeconds << "n");
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_EQUAL(rid.Years, od.Years);
+ CPPUNIT_ASSERT_EQUAL(rid.Months, od.Months);
+ CPPUNIT_ASSERT_EQUAL(rid.Days, od.Days);
+ CPPUNIT_ASSERT_EQUAL(rid.Hours, od.Hours);
+ CPPUNIT_ASSERT_EQUAL(rid.Minutes, od.Minutes);
+ CPPUNIT_ASSERT_EQUAL(rid.Seconds, od.Seconds);
+ CPPUNIT_ASSERT_EQUAL(rid.NanoSeconds, od.NanoSeconds);
+ CPPUNIT_ASSERT_EQUAL(rid.Negative, od.Negative);
+ OUStringBuffer buf(64);
+ Converter::convertDuration(buf, od);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT(buf.makeStringAndClear().equalsAscii(pos));
+}
+
+void doTestDurationF(char const*const pis)
+{
+ util::Duration od;
+ bool bSuccess = Converter::convertDuration(od,
+ OUString::createFromAscii(pis));
+ SAL_INFO("sax.cppunit","" << (od.Negative ? "-" : "+") << " " << od.Years << "Y " << od.Months << "M " << od.Days << "D " << od.Hours << "H " << od.Minutes << "M " << od.Seconds << "S " << od.NanoSeconds << "n");
+ CPPUNIT_ASSERT_MESSAGE(pis, !bSuccess);
+}
+
+void ConverterTest::testDuration()
+{
+ SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST BEGIN");
+ doTest( util::Duration(false, 1, 0, 0, 0, 0, 0, 0), "P1Y" );
+ doTest( util::Duration(false, 0, 42, 0, 0, 0, 0, 0), "P42M" );
+ doTest( util::Duration(false, 0, 0, 111, 0, 0, 0, 0), "P111D" );
+ doTest( util::Duration(false, 0, 0, 0, 52, 0, 0, 0), "PT52H" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 717, 0, 0), "PT717M" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 121, 0), "PT121S" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 190000000), "PT0.19S", "PT0.190000000S");
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 90000000), "PT0.09S", "PT0.090000000S" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 9000000), "PT0.009S", "PT0.009000000S" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 9), "PT0.000000009S", "PT0.000000009S" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 9, 999999999),
+ "PT9.999999999999999999999999999999S", "PT9.999999999S" );
+ doTest( util::Duration(true , 0, 0, 9999, 0, 0, 0, 0), "-P9999D" );
+ doTest( util::Duration(true , 7, 6, 5, 4, 3, 2, 10000000),
+ "-P7Y6M5DT4H3M2.01000S", "-P7Y6M5DT4H3M2.010000000S" );
+ doTest( util::Duration(false, 0, 6, 0, 0, 3, 0, 0), "P6MT3M" );
+ doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 0), "P0D" );
+ doTestDurationF("1Y1M"); // invalid: no ^P
+ doTestDurationF("P-1Y1M"); // invalid: - after P
+ doTestDurationF("P1M1Y"); // invalid: Y after M
+ doTestDurationF("PT1Y"); // invalid: Y after T
+ doTestDurationF("P1Y1M1M"); // invalid: M twice, no T
+ doTestDurationF("P1YT1MT1M"); // invalid: T twice
+ doTestDurationF("P1YT"); // invalid: T but no H,M,S
+ doTestDurationF("P99999999999Y"); // cannot parse so many Ys
+ doTestDurationF("PT.1S"); // invalid: no 0 preceding .
+ doTestDurationF("PT5M.134S"); // invalid: no 0 preceding .
+ doTestDurationF("PT1.S"); // invalid: no digit following .
+ SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST END");
+}
+
+
+bool eqDateTime(const util::DateTime& a, const util::DateTime& b) {
+ return a.Year == b.Year && a.Month == b.Month && a.Day == b.Day
+ && a.Hours == b.Hours && a.Minutes == b.Minutes
+ && a.Seconds == b.Seconds
+ && a.NanoSeconds == b.NanoSeconds
+ && a.IsUTC == b.IsUTC;
+}
+
+void doTest(util::DateTime const & rdt, char const*const pis,
+ char const*const i_pos = nullptr)
+{
+ char const*const pos(i_pos ? i_pos : pis);
+ OUString is(OUString::createFromAscii(pis));
+ util::DateTime odt;
+ SAL_INFO("sax.cppunit","about to convert '" << is << "'");
+ bool bSuccess( Converter::parseDateTime(odt, is) );
+ SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << " M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds << " UTC: " << static_cast<bool>(odt.IsUTC));
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT(eqDateTime(rdt, odt));
+ OUStringBuffer buf(32);
+ Converter::convertDateTime(buf, odt, nullptr, true);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(OUString::createFromAscii(pos),
+ buf.makeStringAndClear());
+}
+
+void doTestDateTimeF(char const*const pis)
+{
+ util::DateTime odt;
+ bool bSuccess = Converter::parseDateTime(odt, OUString::createFromAscii(pis));
+ SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << "H M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds);
+ CPPUNIT_ASSERT(!bSuccess);
+}
+
+void ConverterTest::testDateTime()
+{
+ SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST BEGIN");
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, false), "0001-01-01T00:00:00" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), "0001-01-01T00:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1, false),
+ "-0001-01-01T00:00:00");
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1, true),
+ "-0001-01-01T01:00:00+01:00", "-0001-01-01T00:00:00Z");
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, -324, false),
+ "-0324-01-01T00:00:00" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, true),
+ "0001-01-01T00:00:00-00:00", "0001-01-01T00:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, true),
+ "0001-01-01T00:00:00+00:00", "0001-01-01T00:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 12, 2, 1, 1, true),
+ "0001-01-02T00:00:00-12:00", "0001-01-02T12:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 12, 1, 1, 1, true),
+ "0001-01-02T00:00:00+12:00", "0001-01-01T12:00:00Z" );
+ doTest( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, false),
+ "9999-12-31T23:59:59.99", "9999-12-31T23:59:59.990000000" );
+ doTest( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, true),
+ "9999-12-31T23:59:59.99Z", "9999-12-31T23:59:59.990000000Z" );
+ doTest( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, false),
+ "9999-12-31T23:59:59.9999999999999999999999999999999999999",
+ "9999-12-31T23:59:59.999999999" );
+ doTest( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, true),
+ "9999-12-31T23:59:59.9999999999999999999999999999999999999Z",
+ "9999-12-31T23:59:59.999999999Z" );
+ doTest( util::DateTime(0, 0, 0, 0, 29, 2, 2000, true), // leap year
+ "2000-02-29T00:00:00-00:00", "2000-02-29T00:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 0, 29, 2, 1600, true), // leap year
+ "1600-02-29T00:00:00-00:00", "1600-02-29T00:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 24, 1, 1, 333, false)
+ /*(0, 0, 0, 0, 2, 1, 333)*/,
+ "0333-01-01T24:00:00"/*, "0333-01-02T00:00:00"*/ );
+ // While W3C XMLSchema specifies a minimum of 4 year digits we are lenient
+ // in what we accept.
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, false),
+ "1-01-01T00:00:00", "0001-01-01T00:00:00" );
+ doTestDateTimeF( "+0001-01-01T00:00:00" ); // invalid: ^+
+ doTestDateTimeF( "0001-1-01T00:00:00" ); // invalid: < 2 M
+ doTestDateTimeF( "0001-01-1T00:00:00" ); // invalid: < 2 D
+ doTestDateTimeF( "0001-01-01T0:00:00" ); // invalid: < 2 H
+ doTestDateTimeF( "0001-01-01T00:0:00" ); // invalid: < 2 M
+ doTestDateTimeF( "0001-01-01T00:00:0" ); // invalid: < 2 S
+ doTestDateTimeF( "0001-01-01T00:00:00." ); // invalid: .$
+ doTestDateTimeF( "0001-01-01T00:00:00+1:00" ); // invalid: < 2 TZ H
+ doTestDateTimeF( "0001-01-01T00:00:00+00:1" ); // invalid: < 2 TZ M
+ doTestDateTimeF( "0001-13-01T00:00:00" ); // invalid: M > 12
+ doTestDateTimeF( "0001-01-32T00:00:00" ); // invalid: D > 31
+ doTestDateTimeF( "0001-01-01T25:00:00" ); // invalid: H > 24
+ doTestDateTimeF( "0001-01-01T00:60:00" ); // invalid: M > 59
+ doTestDateTimeF( "0001-01-01T00:00:60" ); // invalid: S > 59
+ doTestDateTimeF( "0001-01-01T24:01:00" ); // invalid: H=24, but M != 0
+ doTestDateTimeF( "0001-01-01T24:00:01" ); // invalid: H=24, but S != 0
+ doTestDateTimeF( "0001-01-01T24:00:00.1" ); // invalid: H=24, but H != 0
+ doTestDateTimeF( "0001-01-02T00:00:00+15:00" ); // invalid: TZ > +14:00
+ doTestDateTimeF( "0001-01-02T00:00:00+14:01" ); // invalid: TZ > +14:00
+ doTestDateTimeF( "0001-01-02T00:00:00-15:00" ); // invalid: TZ < -14:00
+ doTestDateTimeF( "0001-01-02T00:00:00-14:01" ); // invalid: TZ < -14:00
+ doTestDateTimeF( "2100-02-29T00:00:00-00:00" ); // invalid: no leap year
+ doTestDateTimeF( "1900-02-29T00:00:00-00:00" ); // invalid: no leap year
+ doTestDateTimeF( "00:00:00" ); // invalid: no date
+ doTestDateTimeF( "T00:00:00" ); // invalid: no date
+ SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST END");
+}
+
+void doTestTime(util::DateTime const & rdt, char const*const pis,
+ char const*const i_pos = nullptr)
+{
+ char const*const pos(i_pos ? i_pos : pis);
+ OUString is(OUString::createFromAscii(pis));
+ util::DateTime odt;
+ SAL_INFO("sax.cppunit","about to convert '" << is << "'");
+ bool bSuccess( Converter::parseTimeOrDateTime(odt, is) );
+ SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << " M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds << " UTC: " << static_cast<bool>(odt.IsUTC));
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT(eqDateTime(rdt, odt));
+ OUStringBuffer buf(32);
+ Converter::convertTimeOrDateTime(buf, odt);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(OUString::createFromAscii(pos),
+ buf.makeStringAndClear());
+}
+
+void doTestTimeF(char const*const pis)
+{
+ util::DateTime odt;
+ bool bSuccess = Converter::parseTimeOrDateTime(odt, OUString::createFromAscii(pis));
+ SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << "H M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds);
+ CPPUNIT_ASSERT_MESSAGE(pis, !bSuccess);
+}
+
+void ConverterTest::testTime() // time or dateTime + horrible backcompat mess
+{
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, false),
+ "0001-01-01T00:00:00" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, false),
+ "0001-01-01T00:00:00" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, true),
+ "0001-01-01T00:00:00Z" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, -1, false),
+ "-0001-01-01T00:00:00");
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, -1, true),
+ "-0001-01-01T01:00:00+01:00", "-0001-01-01T00:00:00Z");
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, -324, false),
+ "-0324-01-01T00:00:00" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, true),
+ "0001-01-01T00:00:00-00:00", "0001-01-01T00:00:00Z" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, true),
+ "0001-01-01T00:00:00+00:00", "0001-01-01T00:00:00Z" );
+ doTestTime( util::DateTime(0, 0, 0, 12, 2, 1, 1, true),
+ "0001-01-02T00:00:00-12:00", "0001-01-02T12:00:00Z" );
+ doTestTime( util::DateTime(0, 0, 0, 12, 1, 1, 1, true),
+ "0001-01-02T00:00:00+12:00", "0001-01-01T12:00:00Z" );
+ doTestTime( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, false),
+ "9999-12-31T23:59:59.99", "9999-12-31T23:59:59.990000000" );
+ doTestTime( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, true),
+ "9999-12-31T23:59:59.99Z", "9999-12-31T23:59:59.990000000Z" );
+ doTestTime( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, false),
+ "9999-12-31T23:59:59.9999999999999999999999999999999999999",
+ "9999-12-31T23:59:59.999999999" );
+ doTestTime( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, true),
+ "9999-12-31T23:59:59.9999999999999999999999999999999999999Z",
+ "9999-12-31T23:59:59.999999999Z" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 29, 2, 2000, true), // leap year
+ "2000-02-29T00:00:00-00:00", "2000-02-29T00:00:00Z" );
+ doTestTime( util::DateTime(0, 0, 0, 0, 29, 2, 1600, true), // leap year
+ "1600-02-29T00:00:00-00:00", "1600-02-29T00:00:00Z" );
+ doTestTime( util::DateTime(0, 0, 0, 24, 1, 1, 333, false)
+ /*(0, 0, 0, 0, 2, 1, 333)*/,
+ "0333-01-01T24:00:00"/*, "0333-01-02T00:00:00"*/ );
+ // While W3C XMLSchema specifies a minimum of 4 year digits we are lenient
+ // in what we accept.
+ doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, false),
+ "1-01-01T00:00:00", "0001-01-01T00:00:00" );
+
+ doTestTime( util::DateTime(0, 0, 0, 0, 0, 0, 0, false), "00:00:00" );
+ doTestTime( util::DateTime(0, 0, 0, 24, 0, 0, 0, false), "24:00:00" );
+ doTestTime( util::DateTime(0, 0, 59, 0, 0, 0, 0, false), "00:59:00" );
+ doTestTime( util::DateTime(0, 1, 2, 4, 0, 0, 0, true), "04:02:01Z" );
+ doTestTime( util::DateTime(0, 1, 2, 4, 0, 0, 0, true),
+ "05:02:01+01:00", "04:02:01Z" );
+ doTestTime( util::DateTime(0, 11, 12, 9, 0, 0, 0, true),
+ "05:12:11-04:00", "09:12:11Z" );
+ doTestTime( util::DateTime(990000000, 59, 59, 23, 0, 0, 0, false),
+ "23:59:59.99", "23:59:59.990000000" );
+ doTestTime( util::DateTime(990000000, 59, 59, 23, 0, 0, 0, true),
+ "23:59:59.99Z", "23:59:59.990000000Z" );
+ // backwards compatible: recognize invalid 0000-00-00 date (LO 3.5)
+ doTestTime( util::DateTime(0, 1, 0, 0, 0, 0, 0, false),
+ "0000-00-00T00:00:01", "00:00:01" );
+ // backwards compatible: recognize invalid 0-00-00 date (OOo)
+ doTestTime( util::DateTime(0, 0, 1, 0, 0, 0, 0, false),
+ "0-00-00T00:01:00", "00:01:00" );
+
+ doTestTimeF( "+0001-01-01T00:00:00" ); // invalid: ^+
+ doTestTimeF( "0001-1-01T00:00:00" ); // invalid: < 2 M
+ doTestTimeF( "0001-01-1T00:00:00" ); // invalid: < 2 D
+ doTestTimeF( "0001-01-01T0:00:00" ); // invalid: < 2 H
+ doTestTimeF( "0001-01-01T00:0:00" ); // invalid: < 2 M
+ doTestTimeF( "0001-01-01T00:00:0" ); // invalid: < 2 S
+ doTestTimeF( "0001-01-01T00:00:00." ); // invalid: .$
+ doTestTimeF( "0001-01-01T00:00:00+1:00" ); // invalid: < 2 TZ H
+ doTestTimeF( "0001-01-01T00:00:00+00:1" ); // invalid: < 2 TZ M
+ doTestTimeF( "0001-13-01T00:00:00" ); // invalid: M > 12
+ doTestTimeF( "0001-01-32T00:00:00" ); // invalid: D > 31
+ doTestTimeF( "0001-01-01T25:00:00" ); // invalid: H > 24
+ doTestTimeF( "0001-01-01T00:60:00" ); // invalid: M > 59
+ doTestTimeF( "0001-01-01T00:00:60" ); // invalid: S > 59
+ doTestTimeF( "0001-01-01T24:01:00" ); // invalid: H=24, but M != 0
+ doTestTimeF( "0001-01-01T24:00:01" ); // invalid: H=24, but S != 0
+ doTestTimeF( "0001-01-01T24:00:00.1" ); // invalid: H=24, but H != 0
+ doTestTimeF( "0001-01-02T00:00:00+15:00" ); // invalid: TZ > +14:00
+ doTestTimeF( "0001-01-02T00:00:00+14:01" ); // invalid: TZ > +14:00
+ doTestTimeF( "0001-01-02T00:00:00-15:00" ); // invalid: TZ < -14:00
+ doTestTimeF( "0001-01-02T00:00:00-14:01" ); // invalid: TZ < -14:00
+ doTestTimeF( "2100-02-29T00:00:00-00:00" ); // invalid: no leap year
+ doTestTimeF( "1900-02-29T00:00:00-00:00" ); // invalid: no leap year
+ doTestTimeF( "T00:00:00" ); // invalid: T
+ doTestTimeF( "0:00:00" ); // invalid: < 2 H
+ doTestTimeF( "00:0:00" ); // invalid: < 2 M
+ doTestTimeF( "00:00:0" ); // invalid: < 2 S
+ doTestTimeF( "00:00:00." ); // invalid: .$
+ doTestTimeF( "00:00:00+1:00" ); // invalid: < 2 TZ H
+ doTestTimeF( "00:00:00+00:1" ); // invalid: < 2 TZ M
+ doTestTimeF( "25:00:00" ); // invalid: H > 24
+ doTestTimeF( "00:60:00" ); // invalid: M > 59
+ doTestTimeF( "00:00:60" ); // invalid: S > 59
+ doTestTimeF( "24:01:00" ); // invalid: H=24, but M != 0
+ doTestTimeF( "24:00:01" ); // invalid: H=24, but S != 0
+ doTestTimeF( "24:00:00.1" ); // invalid: H=24, but H != 0
+ doTestTimeF( "00:00:00+15:00" ); // invalid: TZ > +14:00
+ doTestTimeF( "00:00:00+14:01" ); // invalid: TZ > +14:00
+ doTestTimeF( "00:00:00-15:00" ); // invalid: TZ < -14:00
+ doTestTimeF( "00:00:00-14:01" ); // invalid: TZ < -14:00
+}
+
+void doTestDouble(char const*const pis, double const rd,
+ sal_Int16 const nSourceUnit, sal_Int16 const nTargetUnit)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ double od;
+ bool bSuccess(Converter::convertDouble(od, is, nSourceUnit, nTargetUnit));
+ SAL_INFO("sax.cppunit","" << od);
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(rd, od, 0.00000001);
+ OUStringBuffer buf;
+ Converter::convertDouble(buf, od, true, nTargetUnit, nSourceUnit);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear());
+}
+
+void ConverterTest::testDouble()
+{
+ doTestDouble("42", 42.0, MeasureUnit::TWIP, MeasureUnit::TWIP);
+ doTestDouble("42", 42.0, MeasureUnit::POINT, MeasureUnit::POINT);
+ doTestDouble("42", 42.0, MeasureUnit::MM_100TH, MeasureUnit::MM_100TH);
+ doTestDouble("42", 42.0, MeasureUnit::MM_10TH, MeasureUnit::MM_10TH);
+ doTestDouble("42", 42.0, MeasureUnit::MM, MeasureUnit::MM); // identity don't seem to add unit?
+ doTestDouble("42", 42.0, MeasureUnit::CM, MeasureUnit::CM);
+ doTestDouble("42", 42.0, MeasureUnit::INCH, MeasureUnit::INCH);
+ doTestDouble("2pt", 40.0, MeasureUnit::POINT, MeasureUnit::TWIP);
+ doTestDouble("20pc", 1, MeasureUnit::TWIP, MeasureUnit::POINT);
+ doTestDouble("4", 2.26771653543307, MeasureUnit::MM_100TH, MeasureUnit::TWIP);
+ doTestDouble("4", 22.6771653543307, MeasureUnit::MM_10TH, MeasureUnit::TWIP);
+ doTestDouble("4mm", 226.771653543307, MeasureUnit::MM, MeasureUnit::TWIP);
+ doTestDouble("4cm", 2267.71653543307, MeasureUnit::CM, MeasureUnit::TWIP);
+ doTestDouble("4in", 5760.0, MeasureUnit::INCH, MeasureUnit::TWIP);
+ doTestDouble("1440pc", 1.0, MeasureUnit::TWIP, MeasureUnit::INCH);
+ doTestDouble("567pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::CM);
+ doTestDouble("56.7pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::MM);
+ doTestDouble("5.67pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::MM_10TH);
+ doTestDouble("0.567pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::MM_100TH);
+ doTestDouble("42pt", 1.4816666666666, MeasureUnit::POINT, MeasureUnit::CM);
+ doTestDouble("42pt", 14.816666666666, MeasureUnit::POINT, MeasureUnit::MM);
+ doTestDouble("42pt", 148.16666666666, MeasureUnit::POINT, MeasureUnit::MM_10TH);
+ doTestDouble("42pt", 1481.6666666666, MeasureUnit::POINT, MeasureUnit::MM_100TH);
+ doTestDouble("72pt", 1.0, MeasureUnit::POINT, MeasureUnit::INCH);
+ doTestDouble("3.5in", 8.89, MeasureUnit::INCH, MeasureUnit::CM);
+ doTestDouble("3.5in", 88.9, MeasureUnit::INCH, MeasureUnit::MM);
+ doTestDouble("3.5in", 889.0, MeasureUnit::INCH, MeasureUnit::MM_10TH);
+ doTestDouble("3.5in", 8890.0, MeasureUnit::INCH, MeasureUnit::MM_100TH);
+ doTestDouble("2in", 144, MeasureUnit::INCH, MeasureUnit::POINT);
+ doTestDouble("5.08cm", 2.0, MeasureUnit::CM, MeasureUnit::INCH);
+ doTestDouble("3.5cm", 3500.0, MeasureUnit::CM, MeasureUnit::MM_100TH);
+ doTestDouble("3.5cm", 350.0, MeasureUnit::CM, MeasureUnit::MM_10TH);
+ doTestDouble("3.5cm", 35.0, MeasureUnit::CM, MeasureUnit::MM);
+ doTestDouble("10cm", 283.464566929134, MeasureUnit::CM, MeasureUnit::POINT);
+ doTestDouble("0.5cm", 283.464566929134, MeasureUnit::CM, MeasureUnit::TWIP);
+ doTestDouble("10mm", 28.3464566929134, MeasureUnit::MM, MeasureUnit::POINT);
+ doTestDouble("0.5mm", 28.3464566929134, MeasureUnit::MM, MeasureUnit::TWIP);
+ doTestDouble("10", 2.83464566929134, MeasureUnit::MM_10TH, MeasureUnit::POINT);
+ doTestDouble("0.5", 2.83464566929134, MeasureUnit::MM_10TH, MeasureUnit::TWIP);
+ doTestDouble("10", 0.283464566929134, MeasureUnit::MM_100TH, MeasureUnit::POINT);
+ doTestDouble("0.5", 0.283464566929134, MeasureUnit::MM_100TH, MeasureUnit::TWIP);
+ doTestDouble("10mm", 1.0, MeasureUnit::MM, MeasureUnit::CM);
+ doTestDouble("10mm", 100.0, MeasureUnit::MM, MeasureUnit::MM_10TH);
+ doTestDouble("20mm", 2000.0, MeasureUnit::MM, MeasureUnit::MM_100TH);
+ doTestDouble("300", 30.0, MeasureUnit::MM_10TH, MeasureUnit::MM);
+ doTestDouble("400", 4.0, MeasureUnit::MM_100TH, MeasureUnit::MM);
+ doTestDouble("600", 6000.0, MeasureUnit::MM_10TH, MeasureUnit::MM_100TH);
+ doTestDouble("700", 70.0, MeasureUnit::MM_100TH, MeasureUnit::MM_10TH);
+}
+
+void doTestStringToMeasure(sal_Int32 rValue, char const*const pis, sal_Int16 nTargetUnit, sal_Int32 nMin, sal_Int32 nMax)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ sal_Int32 nVal;
+ bool bSuccess(Converter::convertMeasure(nVal, is, nTargetUnit, nMin, nMax));
+ SAL_INFO("sax.cppunit","" << nVal);
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_EQUAL(rValue, nVal);
+}
+
+void doTestMeasureToString(char const*const pis, sal_Int32 nMeasure, sal_Int16 const nSourceUnit, sal_Int16 const nTargetUnit)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ OUStringBuffer buf;
+ Converter::convertMeasure(buf, nMeasure, nSourceUnit, nTargetUnit);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear());
+}
+
+void ConverterTest::testMeasure()
+{
+ //check all the measure units
+ doTestStringToMeasure(1000, "10mm", MeasureUnit::MM_100TH, -1, 4321);
+ doTestStringToMeasure(200, "20mm", MeasureUnit::MM_10TH, 12, 4567);
+ doTestStringToMeasure(300, "300", MeasureUnit::MM, 31, 555);
+ doTestStringToMeasure(400, "400", MeasureUnit::CM, 10, 4321);
+ doTestStringToMeasure(120, "120", MeasureUnit::INCH_1000TH, 10, 4321);
+ doTestStringToMeasure(111, "111", MeasureUnit::INCH_100TH, 10, 4321);
+ doTestStringToMeasure(22, "22", MeasureUnit::INCH_10TH, 10, 4321);
+ doTestStringToMeasure(27, "27", MeasureUnit::INCH, 10, 4321);
+ doTestStringToMeasure(52, "52", MeasureUnit::POINT, 10, 4321);
+ doTestStringToMeasure(120, "120", MeasureUnit::TWIP, 10, 4321);
+ doTestStringToMeasure(666, "666", MeasureUnit::M, 10, 4321);
+ doTestStringToMeasure(42, "42", MeasureUnit::KM, 10, 4321);
+ doTestStringToMeasure(30, "30", MeasureUnit::PICA, 10, 4321);
+ doTestStringToMeasure(20, "20", MeasureUnit::FOOT, 10, 4321);
+ doTestStringToMeasure(40, "40", MeasureUnit::MILE, 10, 4321);
+ doTestStringToMeasure(40, "40%", MeasureUnit::PERCENT, 10, 4321);
+ doTestStringToMeasure(800, "800", MeasureUnit::PIXEL, 10, 4321);
+ doTestStringToMeasure(600, "600px", MeasureUnit::PIXEL, 10, 4321);
+ doTestStringToMeasure(777, "777", MeasureUnit::APPFONT, 10, 4321);
+ doTestStringToMeasure(80000, "80000", MeasureUnit::SYSFONT, 10, 432100);
+ //strange values (negative, too large etc.)
+ doTestStringToMeasure(555, "666", MeasureUnit::MM, -1000, 555);
+ doTestStringToMeasure(-1000, "-1001", MeasureUnit::MM, -1000, 555);
+ doTestStringToMeasure(0, "-0", MeasureUnit::MM, -1, 0);
+ doTestStringToMeasure(::std::numeric_limits<sal_Int32>::max(), "1234567890mm", MeasureUnit::MM_10TH, 12, ::std::numeric_limits<sal_Int32>::max());
+ doTestStringToMeasure(-300, "-300", MeasureUnit::MM, -1000, 555);
+ doTestStringToMeasure(::std::numeric_limits<sal_Int32>::min(), "-999999999999999px", MeasureUnit::PIXEL, ::std::numeric_limits<sal_Int32>::min(), 555); //really crazy numbers...
+
+ doTestMeasureToString("6mm", 600, MeasureUnit::MM_100TH, MeasureUnit::MM);
+ doTestMeasureToString("0.005cm", 000000005, MeasureUnit::MM_100TH, MeasureUnit::CM); // zeros in the front doesn't count
+ doTestMeasureToString("3mm", 30, MeasureUnit::MM_10TH, MeasureUnit::MM);
+ doTestMeasureToString("6.66cm", 666, MeasureUnit::MM_10TH, MeasureUnit::CM);
+ doTestMeasureToString("-157.3pt", -555, MeasureUnit::MM_10TH, MeasureUnit::POINT);
+ doTestMeasureToString("174976.378in", 44444000, MeasureUnit::MM_10TH, MeasureUnit::INCH); //let's check accuracy
+ doTestMeasureToString("40%", 40, MeasureUnit::PERCENT, MeasureUnit::PERCENT);
+ doTestMeasureToString("70.56mm", 4000, MeasureUnit::TWIP, MeasureUnit::MM);
+ doTestMeasureToString("979.928cm", 555550, MeasureUnit::TWIP, MeasureUnit::CM);
+ doTestMeasureToString("111.1pt", 2222, MeasureUnit::TWIP, MeasureUnit::POINT);
+ doTestMeasureToString("385.7986in", 555550, MeasureUnit::TWIP, MeasureUnit::INCH);
+ doTestMeasureToString("-2147483.648cm", std::numeric_limits<sal_Int32>::min(), MeasureUnit::MM_100TH, MeasureUnit::CM);
+}
+
+void doTestStringToBool(bool bBool, char const*const pis)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ bool bTemp;
+ bool bSuccess(Converter::convertBool(bTemp, is));
+ SAL_INFO("sax.cppunit","" << bTemp);
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_EQUAL(bBool, bTemp);
+
+}
+
+void doTestBoolToString(char const*const pis, bool bValue )
+{
+ OUString const is(OUString::createFromAscii(pis));
+ OUStringBuffer buf;
+ Converter::convertBool(buf, bValue);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear());
+}
+
+void ConverterTest::testBool()
+{
+ doTestStringToBool(true, "true");
+ doTestStringToBool(false, "false");
+ doTestBoolToString("true", true);
+ doTestBoolToString("false", false);
+}
+
+void doTestStringToPercent(sal_Int32 nValue, char const*const pis)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ sal_Int32 nTemp;
+ bool bSuccess(Converter::convertPercent(nTemp, is));
+ SAL_INFO("sax.cppunit","" << nTemp);
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_EQUAL(nValue, nTemp);
+}
+
+void doTestPercentToString(char const*const pis, sal_Int32 nValue)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ OUStringBuffer buf;
+ Converter::convertPercent(buf, nValue);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear());
+}
+
+void ConverterTest::testPercent()
+{
+ doTestStringToPercent(40, "40%");
+ doTestStringToPercent(30, "30");
+ doTestStringToPercent(120, "120%");
+ doTestStringToPercent(-40, "-40%");
+ doTestStringToPercent(0, "0%");
+ doTestPercentToString("12%", 12);
+ doTestPercentToString("-123%", -123);
+ doTestPercentToString("0%", 0);
+ doTestPercentToString("1%", 00001);
+}
+
+void doTestStringToColor(sal_Int32 nValue, char const*const pis)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ sal_Int32 nTemp;
+ bool bSuccess(Converter::convertColor(nTemp, is));
+ SAL_INFO("sax.cppunit","" << nTemp);
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_EQUAL(nValue, nTemp);
+}
+
+void doTestColorToString(char const*const pis, sal_Int32 nValue)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ OUStringBuffer buf;
+ Converter::convertColor(buf, nValue);
+ SAL_INFO("sax.cppunit","" << buf.toString());
+ CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear());
+}
+
+void ConverterTest::testColor()
+{
+ doTestStringToColor(11259375, "#abcdef");
+ doTestStringToColor(160, "#0000a0");
+ doTestStringToColor(40960, "#00a000");
+ doTestStringToColor(0, "#000000");
+ doTestColorToString("#000615", 1557);
+ doTestColorToString("#5bcd15", 123456789);
+ doTestColorToString("#fffac7", -1337);
+ doTestColorToString("#000000", 0);
+}
+
+void doTestStringToNumber(sal_Int32 nValue, char const*const pis, sal_Int32 nMin, sal_Int32 nMax)
+{
+ OUString const is(OUString::createFromAscii(pis));
+ sal_Int32 nTemp;
+ bool bSuccess(Converter::convertNumber(nTemp, is, nMin, nMax));
+ SAL_INFO("sax.cppunit","" << nTemp);
+ CPPUNIT_ASSERT(bSuccess);
+ CPPUNIT_ASSERT_EQUAL(nValue, nTemp);
+}
+
+void ConverterTest::testNumber()
+{
+ doTestStringToNumber(30, "30", 1, 40);
+ doTestStringToNumber(1, "-5", 1, 300);
+ doTestStringToNumber(-30, "7", -100, -30);
+ doTestStringToNumber(0, "-0", 0, 1);
+ doTestStringToNumber(0, "666", -0, 0);
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(ConverterTest);
+
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/qa/cppunit/xmlimport.cxx b/sax/qa/cppunit/xmlimport.cxx
new file mode 100644
index 000000000..1eb872d50
--- /dev/null
+++ b/sax/qa/cppunit/xmlimport.cxx
@@ -0,0 +1,454 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+#include <sal/types.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+#include <test/bootstrapfixture.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <com/sun/star/beans/Pair.hpp>
+#include <com/sun/star/xml/sax/SAXException.hpp>
+#include <com/sun/star/xml/sax/XDocumentHandler.hpp>
+#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
+#include <comphelper/processfactory.hxx>
+#include <com/sun/star/xml/sax/Parser.hpp>
+#include <com/sun/star/xml/sax/XParser.hpp>
+#include <com/sun/star/xml/sax/XLocator.hpp>
+#include <com/sun/star/xml/sax/FastToken.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <osl/file.hxx>
+#include <unotools/ucbstreamhelper.hxx>
+#include <unotools/streamwrap.hxx>
+#include <sax/fastattribs.hxx>
+#include <stack>
+#include <string_view>
+#include <deque>
+#include <rtl/ref.hxx>
+
+
+namespace {
+
+using namespace css;
+using namespace uno;
+using namespace io;
+using namespace xml::sax;
+using namespace ::osl;
+using namespace sax_fastparser;
+
+Reference< XInputStream > createStreamFromFile (
+ const OUString & filePath)
+{
+ Reference< XInputStream > xInputStream;
+ OUString aInStr;
+ FileBase::getFileURLFromSystemPath(filePath, aInStr);
+ std::unique_ptr<SvStream> pStream = utl::UcbStreamHelper::CreateStream(aInStr, StreamMode::READ);
+ if(pStream == nullptr)
+ CPPUNIT_ASSERT(false);
+ Reference< XStream > xStream(new utl::OStreamWrapper(std::move(pStream)));
+ xInputStream.set(xStream, UNO_QUERY);
+ return xInputStream;
+}
+
+class TestDocumentHandler : public cppu::WeakImplHelper< XDocumentHandler >
+{
+private:
+ OUString m_aStr;
+ std::deque< std::pair<OUString,OUString> > m_aNamespaceStack;
+ std::stack<sal_uInt16> m_aCountStack;
+
+ OUString canonicalform(const OUString &sName, const OUString &sValue, bool isElement);
+ OUString getNamespace(std::u16string_view sName);
+
+public:
+ TestDocumentHandler() {}
+ const OUString & getString() const { return m_aStr; }
+
+ // XDocumentHandler
+ virtual void SAL_CALL startDocument() override;
+ virtual void SAL_CALL endDocument() override;
+ virtual void SAL_CALL startElement( const OUString& aName, const Reference< XAttributeList >& xAttribs ) override;
+ virtual void SAL_CALL endElement( const OUString& aName ) override;
+ virtual void SAL_CALL characters( const OUString& aChars ) override;
+ virtual void SAL_CALL ignorableWhitespace( const OUString& aWhitespaces ) override;
+ virtual void SAL_CALL processingInstruction( const OUString& aTarget, const OUString& aData ) override;
+ virtual void SAL_CALL setDocumentLocator( const Reference< XLocator >& xLocator ) override;
+};
+
+OUString TestDocumentHandler::canonicalform(const OUString &sName, const OUString &sValue, bool isElement)
+{
+ sal_Int16 nIndex = sName.indexOf(":");
+ if ( !isElement && sName.match( "xmlns" ) )
+ {
+ m_aCountStack.top() += 1;
+ if ( nIndex < 0 )
+ m_aNamespaceStack.emplace_back( OUString( "default" ), sValue );
+ else
+ m_aNamespaceStack.emplace_back( sName.copy( nIndex + 1 ), sValue );
+ }
+ else
+ {
+ if ( nIndex >= 0 )
+ {
+ OUString sNamespace = getNamespace( sName.subView( 0, nIndex ) );
+ return sNamespace + sName.subView(nIndex);
+ }
+ else
+ {
+ OUString sDefaultns = getNamespace( u"default" );
+ if ( !isElement || sDefaultns.isEmpty() )
+ return sName;
+ else
+ return sDefaultns + ":" + sName;
+ }
+ }
+ return OUString();
+}
+
+OUString TestDocumentHandler::getNamespace(std::u16string_view sName)
+{
+ for (sal_Int16 i = m_aNamespaceStack.size() - 1; i>=0; i--)
+ {
+ std::pair<OUString, OUString> aPair = m_aNamespaceStack.at(i);
+ if (aPair.first == sName)
+ return aPair.second;
+ }
+ return OUString();
+}
+
+void SAL_CALL TestDocumentHandler::startDocument()
+{
+ m_aStr.clear();
+ m_aNamespaceStack.clear();
+ m_aNamespaceStack.emplace_back( std::make_pair( OUString( "default" ), OUString() ) );
+ m_aCountStack = std::stack<sal_uInt16>();
+ m_aCountStack.emplace(0);
+}
+
+
+void SAL_CALL TestDocumentHandler::endDocument()
+{
+}
+
+void SAL_CALL TestDocumentHandler::startElement( const OUString& aName, const Reference< XAttributeList >& xAttribs )
+{
+ OUString sAttributes;
+ m_aCountStack.push(0);
+ sal_uInt16 len = xAttribs->getLength();
+ for (sal_uInt16 i=0; i<len; i++)
+ {
+ OUString sAttrValue = xAttribs->getValueByIndex(i);
+ OUString sAttrName = canonicalform(xAttribs->getNameByIndex(i), sAttrValue, false);
+ if (!sAttrName.isEmpty())
+ sAttributes += sAttrName + sAttrValue;
+ }
+ m_aStr += canonicalform(aName, "", true) + sAttributes;
+}
+
+
+void SAL_CALL TestDocumentHandler::endElement( const OUString& aName )
+{
+ m_aStr += canonicalform(aName, "", true);
+ sal_uInt16 nPopQty = m_aCountStack.top();
+ for (sal_uInt16 i=0; i<nPopQty; i++)
+ m_aNamespaceStack.pop_back();
+ m_aCountStack.pop();
+}
+
+
+void SAL_CALL TestDocumentHandler::characters( const OUString& aChars )
+{
+ m_aStr += aChars;
+}
+
+
+void SAL_CALL TestDocumentHandler::ignorableWhitespace( const OUString& aWhitespaces )
+{
+ m_aStr += aWhitespaces;
+}
+
+
+void SAL_CALL TestDocumentHandler::processingInstruction( const OUString& aTarget, const OUString& aData )
+{
+ m_aStr += aTarget + aData;
+}
+
+
+void SAL_CALL TestDocumentHandler::setDocumentLocator( const Reference< XLocator >& /*xLocator*/ )
+{
+}
+
+class NSDocumentHandler : public cppu::WeakImplHelper< XDocumentHandler >
+{
+public:
+ NSDocumentHandler() {}
+
+ // XDocumentHandler
+ virtual void SAL_CALL startDocument() override {}
+ virtual void SAL_CALL endDocument() override {}
+ virtual void SAL_CALL startElement( const OUString& aName, const Reference< XAttributeList >& xAttribs ) override;
+ virtual void SAL_CALL endElement( const OUString& /* aName */ ) override {}
+ virtual void SAL_CALL characters( const OUString& /* aChars */ ) override {}
+ virtual void SAL_CALL ignorableWhitespace( const OUString& /* aWhitespaces */ ) override {}
+ virtual void SAL_CALL processingInstruction( const OUString& /* aTarget */, const OUString& /* aData */ ) override {}
+ virtual void SAL_CALL setDocumentLocator( const Reference< XLocator >& /* xLocator */ ) override {}
+};
+
+OUString getNamespaceValue( std::u16string_view rNamespacePrefix )
+{
+ OUString aNamespaceURI;
+ if (rNamespacePrefix == u"office")
+ aNamespaceURI = "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
+ else if (rNamespacePrefix == u"text")
+ aNamespaceURI = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
+ else if (rNamespacePrefix == u"note")
+ aNamespaceURI = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
+ return aNamespaceURI;
+}
+
+OUString resolveNamespace( const OUString& aName )
+{
+ int index;
+ if (( index = aName.indexOf( ':' )) > 0 )
+ {
+ if ( aName.getLength() > index + 1 )
+ {
+ OUString aAttributeName = getNamespaceValue( aName.subView( 0, index ) ) +
+ ":" + aName.subView( index + 1 );
+ return aAttributeName;
+ }
+ }
+ return aName;
+}
+
+void SAL_CALL NSDocumentHandler::startElement( const OUString& aName, const Reference< XAttributeList >&/* xAttribs */ )
+{
+ if (! (aName == "office:document" || aName == "office:body" || aName == "office:text" ||
+ aName == "text:p" || aName == "note:p") )
+ CPPUNIT_ASSERT(false);
+
+ OUString sResolvedName = resolveNamespace(aName);
+ if (! ( sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:office:1.0:document" ||
+ sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:office:1.0:body" ||
+ sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:office:1.0:text" ||
+ sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:text:1.0:p") )
+ CPPUNIT_ASSERT(false);
+}
+
+class DummyTokenHandler : public sax_fastparser::FastTokenHandlerBase
+{
+public:
+ const static std::string_view tokens[];
+ const static std::u16string_view namespaceURIs[];
+ const static std::string_view namespacePrefixes[];
+
+ // XFastTokenHandler
+ virtual Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) override;
+ virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier ) override;
+ //FastTokenHandlerBase
+ virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override;
+};
+
+const std::string_view DummyTokenHandler::tokens[] = {
+ "Signature", "CanonicalizationMethod",
+ "Algorithm", "Type",
+ "DigestMethod", "Reference",
+ "document", "spacing",
+ "Player", "Height" };
+
+const std::u16string_view DummyTokenHandler::namespaceURIs[] = {
+ u"http://www.w3.org/2000/09/xmldsig#",
+ u"http://schemas.openxmlformats.org/wordprocessingml/2006/main/",
+ u"xyzsports.com/players/football/" };
+
+const std::string_view DummyTokenHandler::namespacePrefixes[] = {
+ "",
+ "w",
+ "Player" };
+
+Sequence< sal_Int8 > DummyTokenHandler::getUTF8Identifier( sal_Int32 nToken )
+{
+ std::string_view aUtf8Token;
+ if ( ( nToken & 0xffff0000 ) != 0 ) //namespace
+ {
+ sal_uInt32 nNamespaceToken = ( nToken >> 16 ) - 1;
+ if ( nNamespaceToken < std::size(namespacePrefixes) )
+ aUtf8Token = namespacePrefixes[ nNamespaceToken ];
+ }
+ else //element or attribute
+ {
+ size_t nElementToken = nToken & 0xffff;
+ if ( nElementToken < std::size(tokens) )
+ aUtf8Token = tokens[ nElementToken ];
+ }
+ Sequence< sal_Int8 > aSeq( reinterpret_cast< const sal_Int8* >(
+ aUtf8Token.data() ), aUtf8Token.size() );
+ return aSeq;
+}
+
+sal_Int32 DummyTokenHandler::getTokenFromUTF8( const uno::Sequence< sal_Int8 >& rIdentifier )
+{
+ return getTokenDirect( reinterpret_cast< const char* >(
+ rIdentifier.getConstArray() ), rIdentifier.getLength() );
+}
+
+sal_Int32 DummyTokenHandler::getTokenDirect( const char* pToken, sal_Int32 nLength ) const
+{
+ std::string_view sToken( pToken, nLength );
+ for( size_t i = 0; i < std::size(tokens); i++ )
+ {
+ if ( tokens[i] == sToken )
+ return static_cast<sal_Int32>(i);
+ }
+ return FastToken::DONTKNOW;
+}
+
+
+class XMLImportTest : public test::BootstrapFixture
+{
+private:
+ OUString m_sDirPath;
+ rtl::Reference< TestDocumentHandler > m_xDocumentHandler;
+ Reference< XParser > m_xParser;
+ Reference< XParser > m_xLegacyFastParser;
+
+public:
+ virtual void setUp() override;
+
+ XMLImportTest() : BootstrapFixture(true, false) {}
+ void parse();
+ void testMissingNamespaceDeclaration();
+ void testIllegalNamespaceUse();
+
+ CPPUNIT_TEST_SUITE( XMLImportTest );
+ CPPUNIT_TEST( parse );
+ CPPUNIT_TEST( testMissingNamespaceDeclaration );
+ CPPUNIT_TEST( testIllegalNamespaceUse );
+ CPPUNIT_TEST_SUITE_END();
+};
+
+void XMLImportTest::setUp()
+{
+ test::BootstrapFixture::setUp();
+ Reference< XComponentContext > xContext = comphelper::getProcessComponentContext();
+ m_xDocumentHandler.set( new TestDocumentHandler() );
+ m_xParser = Parser::create( xContext );
+ m_xParser->setDocumentHandler( m_xDocumentHandler );
+ m_xLegacyFastParser.set( xContext->getServiceManager()->createInstanceWithContext
+ ( "com.sun.star.xml.sax.LegacyFastParser", xContext ), UNO_QUERY );
+ m_xLegacyFastParser->setDocumentHandler( m_xDocumentHandler );
+
+ Reference< XFastTokenHandler > xTokenHandler;
+ xTokenHandler.set( new DummyTokenHandler );
+ uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser,
+ uno::UNO_QUERY_THROW);
+ xInit->initialize({ uno::Any(xTokenHandler) });
+
+ sal_Int32 nNamespaceCount = SAL_N_ELEMENTS(DummyTokenHandler::namespaceURIs);
+ uno::Sequence<uno::Any> namespaceArgs( nNamespaceCount + 1 );
+ auto p_namespaceArgs = namespaceArgs.getArray();
+ p_namespaceArgs[0] <<= OUString( "registerNamespaces" );
+ for (sal_Int32 i = 1; i <= nNamespaceCount; i++ )
+ {
+ css::beans::Pair<OUString, sal_Int32> rPair( OUString(DummyTokenHandler::namespaceURIs[i - 1]), i << 16 );
+ p_namespaceArgs[i] <<= rPair;
+ }
+ xInit->initialize( namespaceArgs );
+
+ m_sDirPath = m_directories.getPathFromSrc( u"/sax/qa/data/" );
+}
+
+void XMLImportTest::parse()
+{
+ OUString fileNames[] = {"simple.xml", "defaultns.xml", "inlinens.xml",
+ "multiplens.xml", "multiplepfx.xml",
+ "nstoattributes.xml", "nestedns.xml", "testthreading.xml"};
+
+ for (size_t i = 0; i < std::size( fileNames ); i++)
+ {
+ InputSource source;
+ source.sSystemId = "internal";
+
+ source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] );
+ m_xParser->parseStream(source);
+ const OUString rParserStr = m_xDocumentHandler->getString();
+
+ source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] );
+ m_xLegacyFastParser->parseStream(source);
+ const OUString rLegacyFastParserStr = m_xDocumentHandler->getString();
+
+ CPPUNIT_ASSERT_EQUAL( rParserStr, rLegacyFastParserStr );
+ // OString o = OUStringToOString( Str, RTL_TEXTENCODING_ASCII_US );
+ // CPPUNIT_ASSERT_MESSAGE( string(o.pData->buffer), false );
+ }
+}
+
+void XMLImportTest::testMissingNamespaceDeclaration()
+{
+ OUString fileNames[] = { "manifestwithnsdecl.xml", "manifestwithoutnsdecl.xml" };
+
+ uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser,
+ uno::UNO_QUERY_THROW);
+ xInit->initialize({ uno::Any(OUString("IgnoreMissingNSDecl")) });
+
+ for (sal_uInt16 i = 0; i < std::size( fileNames ); i++)
+ {
+ try
+ {
+ InputSource source;
+ source.sSystemId = "internal";
+
+ source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] );
+ m_xParser->parseStream(source);
+ const OUString rParserStr = m_xDocumentHandler->getString();
+
+ source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] );
+ m_xLegacyFastParser->parseStream(source);
+ const OUString rLegacyFastParserStr = m_xDocumentHandler->getString();
+
+ CPPUNIT_ASSERT_EQUAL( rParserStr, rLegacyFastParserStr );
+ }
+ catch( const SAXException & )
+ {
+ }
+ }
+}
+
+void XMLImportTest::testIllegalNamespaceUse()
+{
+ rtl::Reference< NSDocumentHandler > m_xNSDocumentHandler;
+ m_xNSDocumentHandler.set( new NSDocumentHandler() );
+ m_xParser->setDocumentHandler( m_xNSDocumentHandler );
+ InputSource source;
+ source.sSystemId = "internal";
+
+ source.aInputStream = createStreamFromFile( m_sDirPath + "multiplepfx.xml" );
+ m_xParser->parseStream(source);
+
+ m_xLegacyFastParser->setDocumentHandler( m_xNSDocumentHandler );
+ source.aInputStream = createStreamFromFile( m_sDirPath + "multiplepfx.xml" );
+ m_xLegacyFastParser->parseStream(source);
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION( XMLImportTest );
+} //namespace
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/qa/data/defaultns.xml b/sax/qa/data/defaultns.xml
new file mode 100644
index 000000000..2e7819b16
--- /dev/null
+++ b/sax/qa/data/defaultns.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" ?>
+<Books xmlns="http://xyzbooks.com/books/">
+ <Book>
+ <Title>War and Peace</Title>
+ <Author>Leo Tolstoy</Author>
+ </Book>
+ <Book>
+ <Title>To Kill a Mockingbird</Title>
+ <Author>Harper Lee</Author>
+ </Book>
+</Books>
diff --git a/sax/qa/data/inlinens.xml b/sax/qa/data/inlinens.xml
new file mode 100644
index 000000000..02c421408
--- /dev/null
+++ b/sax/qa/data/inlinens.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" ?>
+<Students xmlns="http://xyzuniversity.org/student/">
+ <Student xmlns:ug="http://xyzuniversity.org/student/ug/">
+ <Name>ABC</Name>
+ <ug:Branch>Computer Science</ug:Branch>
+ <ug:Grade>7.9</ug:Grade>
+ </Student>
+ <Student xmlns:pg="http://xyzuniversity.org/student/pg/">
+ <Name>PQR</Name>
+ <pg:Field>Artificial Intelligence</pg:Field>
+ </Student>
+</Students> \ No newline at end of file
diff --git a/sax/qa/data/manifestwithnsdecl.xml b/sax/qa/data/manifestwithnsdecl.xml
new file mode 100644
index 000000000..ac61c3e20
--- /dev/null
+++ b/sax/qa/data/manifestwithnsdecl.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" manifest:version="1.2">
+ <manifest:file-entry manifest:full-path="/" manifest:version="1.2" manifest:media-type="application/vnd.oasis.opendocument.text"/>
+ <manifest:file-entry manifest:full-path="Thumbnails/thumbnail.png" manifest:media-type="image/png"/>
+ <manifest:file-entry manifest:full-path="content.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="styles.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="meta.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="settings.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="Configurations2/accelerator/current.xml" manifest:media-type=""/>
+ <manifest:file-entry manifest:full-path="Configurations2/" manifest:media-type="application/vnd.sun.xml.ui.configuration"/>
+ <manifest:file-entry manifest:full-path="manifest.rdf" manifest:media-type="application/rdf+xml"/>
+</manifest:manifest>
diff --git a/sax/qa/data/manifestwithoutnsdecl.xml b/sax/qa/data/manifestwithoutnsdecl.xml
new file mode 100644
index 000000000..1c8f53596
--- /dev/null
+++ b/sax/qa/data/manifestwithoutnsdecl.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<manifest:manifest>
+ <manifest:file-entry manifest:full-path="/" manifest:version="1.2" manifest:media-type="application/vnd.oasis.opendocument.text"/>
+ <manifest:file-entry manifest:full-path="Thumbnails/thumbnail.png" manifest:media-type="image/png"/>
+ <manifest:file-entry manifest:full-path="content.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="styles.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="meta.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="settings.xml" manifest:media-type="text/xml"/>
+ <manifest:file-entry manifest:full-path="Configurations2/accelerator/current.xml" manifest:media-type=""/>
+ <manifest:file-entry manifest:full-path="Configurations2/" manifest:media-type="application/vnd.sun.xml.ui.configuration"/>
+ <manifest:file-entry manifest:full-path="manifest.rdf" manifest:media-type="application/rdf+xml"/>
+</manifest:manifest>
diff --git a/sax/qa/data/multiplens.xml b/sax/qa/data/multiplens.xml
new file mode 100644
index 000000000..e1dc4ce01
--- /dev/null
+++ b/sax/qa/data/multiplens.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" ?>
+<Athletes>
+ <Player xmlns:Player="xyzsports.com/players/football/">
+ <Player:Name>Lionel Messi</Player:Name>
+ <Player:Height>1.70 m</Player:Height>
+ <Player:Position>Forward</Player:Position>
+ </Player>
+ <Player xmlns:Player="xyzsports.com/players/Cricket/">
+ <Player:Name>Sachin Ramesh Tendulkar</Player:Name>
+ <Player:Height>165 cm</Player:Height>
+ <Player:Style>Right handed</Player:Style>
+ </Player>
+</Athletes> \ No newline at end of file
diff --git a/sax/qa/data/multiplepfx.xml b/sax/qa/data/multiplepfx.xml
new file mode 100644
index 000000000..b7686cad5
--- /dev/null
+++ b/sax/qa/data/multiplepfx.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" ?>
+<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0">
+ <office:body>
+ <office:text>
+ <text:p xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" text:style-name="Title">Test Document </text:p>
+ <note:p xmlns:note="urn:oasis:names:tc:opendocument:xmlns:text:1.0" note:style-name="Heading">For testing purposes only</note:p>
+ </office:text>
+ </office:body>
+</office:document>
diff --git a/sax/qa/data/nestedns.xml b/sax/qa/data/nestedns.xml
new file mode 100644
index 000000000..566332b40
--- /dev/null
+++ b/sax/qa/data/nestedns.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" ?>
+<?pi-target pi-data?>
+<Elements>
+ <Book xmlns:lib="http://www.library.com/">
+ <lib:Title>Sherlock Holmes - I</lib:Title>
+ <lib:Author>Arthur Conan Doyle</lib:Author>
+ <purchase xmlns:lib="http://www.otherlibrary.com/">
+ <lib:Title>Sherlock Holmes - II</lib:Title>
+ <lib:Author>Arthur Conan Doyle</lib:Author>
+ </purchase>
+ <lib:Title>Sherlock Holmes - III</lib:Title>
+ <lib:Author>Arthur Conan Doyle</lib:Author>
+ </Book>
+ <Electronics xmlns="http://doesntexist.com/electronics/">
+ <item>
+ <Name>Apple iPhone 6s</Name>
+ <?pi-target-only?>
+ <Price>$324</Price>
+ </item>
+ <item xmlns="http://doesntexist.com/dailyuse/">
+ <Name>Philips Aqua Touch Shaver</Name>
+ <item xmlns="http://doesntexist.com/dailyuse/model/">
+ <Model>AT890</Model>
+ <Price>$74</Price>
+ </item>
+ </item>
+ <item>
+ <Name>Macbook Pro</Name>
+ <Price>$500</Price>
+ </item>
+ </Electronics>
+</Elements> \ No newline at end of file
diff --git a/sax/qa/data/nstoattributes.xml b/sax/qa/data/nstoattributes.xml
new file mode 100644
index 000000000..dee2edfdc
--- /dev/null
+++ b/sax/qa/data/nstoattributes.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" ?>
+<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main/">
+ <w:body>
+ <w:p w:rsidR="009A1A3D" w:rsidRPr="00BA52A7" w:rsidRDefault="00225691" w:rsidP="00BA52A7">
+ <w:pPr xyz="abc">
+ <w:spacing w:line="276" w:lineRule="auto"/>
+ <w:rPr>
+ <w:rFonts w:asciiTheme="minorHAnsi" w:hAnsiTheme="minorHAnsi"/>
+ <w:sz w:val="24" val="27" />
+ <w:szCs w:val="24"/>
+ </w:rPr>
+ </w:pPr>
+ <w:bookmarkStart w:id="0" w:name="page1"/>
+ <w:bookmarkEnd w:id="0"/>
+ </w:p>
+ </w:body>
+</w:document> \ No newline at end of file
diff --git a/sax/qa/data/simple.xml b/sax/qa/data/simple.xml
new file mode 100644
index 000000000..67c4fbde5
--- /dev/null
+++ b/sax/qa/data/simple.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" ?>
+<TVActors>
+ <Actor>
+ <Name>Bryan Cranston</Name>
+ <Show>Breaking Bad</Show>
+ </Actor>
+ <Actor>
+ <Name>Peter Dinklage</Name>
+ <Show>Game of Thrones</Show>
+ </Actor>
+</TVActors> \ No newline at end of file
diff --git a/sax/qa/data/testthreading.xml b/sax/qa/data/testthreading.xml
new file mode 100644
index 000000000..0d05fd995
--- /dev/null
+++ b/sax/qa/data/testthreading.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?><Signature xmlns="http://www.w3.org/2000/09/xmldsig#" Id="idPackageSignature"><SignedInfo><CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/><SignatureMethod Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256"/><Reference Type="http://www.w3.org/2000/09/xmldsig#Object" URI="#idPackageObject"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>NOjlH6v6g2ojDvQoi4M5B8Bght0y3ES4fjxlRk2xtVE=</DigestValue></Reference><Reference Type="http://www.w3.org/2000/09/xmldsig#Object" URI="#idOfficeObject"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>mq1H1GLrTZSuUUTqX5HpjytFwl8nJFggNsXJUgQZT0U=</DigestValue></Reference><Reference Type="http://uri.etsi.org/01903#SignedProperties" URI="#idSignedProperties"><Transforms><Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>6jeT5n6jeqOspz6K6BIgitwfLZf4s1/uS9dPdOQRF8s=</DigestValue></Reference></SignedInfo><SignatureValue>UZwOWZbOm9ysRa+eYwoksUW8d+jBFPOkqOtfTvT8waHWDe3siDc4N79W6jPCYMIHMtPQjCb4qMDe
+dhCYvg6TTWFWCvU/br+A1Qo0xovWHD3DwB29qk7NDBfbnEIPxbOe2D70cZa86Zl2MgW5YqQlgRFH
+g6+XxwBUp+ZYx4knFWEg8zgbe3JnV7zeI/RG/1iq9TYH1GUBloF10df4qaulrp2AUkdSvnnUcxRP
+ZfbS+14YxUHkW0UTyV+6ZeXqtTnXS0F/LG5JH2/xkN+mgwLB6TPfxtQD6vcj+Tdnf0hHlnuOmvBS
+L7Pn+zwS0ueMOTxIozcaYPsmJc9fVpEU2I59PA==</SignatureValue><KeyInfo><X509Data><X509Certificate>MIIE7jCCAtagAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwVzELMAkGA1UEBhMCVUsxEDAOBgNVBAgMB0VuZ2xhbmQxEjAQBgNVBAoMCVRTQ1AgVGVzdDEiMCAGA1UEAwwZVFNDUCBJbnRlcm1lZGlhdGUgUm9vdCBDQTAeFw0xNTEyMTgwNzU4MTlaFw0xNjEyMjcwNzU4MTlaMFUxCzAJBgNVBAYTAlVLMRAwDgYDVQQIDAdFbmdsYW5kMRIwEAYDVQQKDAlUU0NQIFRlc3QxIDAeBgNVBAMMF1RTQ1AgVGVzdCBleGFtcGxlIEFsaWNlMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA3m2YNdX+nc1LkhlrNrcIPI3yCWnv0/0k9zDKpKiwjMH4vjWM46M6ptAiupxVpAMW5ojnhEyxaNHvZNsCwddYE6778hut2SJvz0szSBuHUuedcALI2EhVwdM0yLqfGo6WGeOIBDId49TemdNCMhk2zOpb1BqYhKls0LfdbxT/an3JaDmmLhPjvgYMJNYVX86L199OQFLJ1zLqQ0YirkKqXL9cSPmyYBKjgnqQ4Z5YfPL63EP0TsEfa5oQmy/0gS5FB2Wz9CqIptB130v0GR4XObTpOkhPFfC5RDBFTMZoi4NCK10wn2NCbr7qZ3aMrOlfeKbsNIifwu0KYFHXyxL5AwIDAQABo4HFMIHCMAkGA1UdEwQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMDMGCWCGSAGG+EIBDQQmFiRPcGVuU1NMIEdlbmVyYXRlZCBDbGllbnQgQ2VydGlmaWNhdGUwHQYDVR0OBBYEFCL6DzsuAbni8475Z+HkX5tv8iiWMB8GA1UdIwQYMBaAFMuejS1rWjUf3x1+2QbPSVpuXFl+MA4GA1UdDwEB/wQEAwIF4DAdBgNVHSUEFjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwDQYJKoZIhvcNAQELBQADggIBAFs0DeCDjttHQ0UHsYcnhfBCWRdOFdIr3F/IEbN2BL/grScGXoXRaYMIQJv/s5dKgZIuH7xMCVKazoftPVqU4bOEduAv0IJ6hQF/wEMBueA0UjvQQVYZgsOALi7TD3gYpFqYcH2Wfx5/5Ln6dllL8UsHoP+6gSLaYwjJd7FQ+IlNTzR65dRMLoJhoKqqyuM6cf/PM8sbK2NH2r8toypjfPixvD/w3wP7xn4oo/IGXcRK4DTHBF/rSMqeR6ePwXm5tVHrQBfnxN3dsGsXkQgqzBvvbPY0raraO4CPR7mZp4GVFHOsUNh5TI1SlfxWZ49HU3F5jWeiI9jPuw1RmuAyZdFEt403Wi67v6revXe1By6UqIZjq3b2pJGBKZH+60P1cJScawzrN8pi1qQFV8JiiJM6/MSciqplTT5F7SG0XZx1CjnBz5rMdYNhI9NNtF3oy9Xy9RvgYehFaC43ZlBBUMDmZFj5a78hOOkkq1UnrHUdeXyWhiEFzv5d8My2i0kWGq8r0HuC25BmOa17lHVxQ2o7Rdu9jDFP9oNizC7kQfA5QVRTfBFcWH7jml69RmVgfM+X+wdQgen9hJAILhBzmDfeteJ5ZEaoEYtw3isOGkpSyg7odjgYq7I+bOiN1toDg07vzfIkvF9KxlkDeRLXbmcFIvQsqFeF6cUwlZQYLOHA</X509Certificate></X509Data></KeyInfo><Object Id="idPackageObject"><Manifest><Reference URI="/_rels/.rels?ContentType=application/vnd.openxmlformats-package.relationships+xml"><Transforms><Transform Algorithm="http://schemas.openxmlformats.org/package/2006/RelationshipTransform"><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId1"/></Transform><Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>Mq3mDDWudLiaQFa1psBgLG+/en7p7r8re0MtlxnuiUI=</DigestValue></Reference><Reference URI="/word/_rels/document.xml.rels?ContentType=application/vnd.openxmlformats-package.relationships+xml"><Transforms><Transform Algorithm="http://schemas.openxmlformats.org/package/2006/RelationshipTransform"><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId2"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId1"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId5"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId4"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId3"/></Transform><Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>fLpr0+BDSSPPgsv2eBghgw8hu/vi7IslCQuCSKGf2X4=</DigestValue></Reference><Reference URI="/word/document.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>7Hrf+Oz9oMcqn7nOKgy1P39L313r8SO/pT3wQpVwq5k=</DigestValue></Reference><Reference URI="/word/fontTable.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>VZENondrzBVuF5GsYtsdSYgGNQS11L4XQ7vYiTn5PBE=</DigestValue></Reference><Reference URI="/word/settings.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>nYb+Wnf/ttBtq0PSR33tt+yKzvx6RdYmtEhD3XnBwaI=</DigestValue></Reference><Reference URI="/word/styles.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>4W+Exf0d9q8aHyBJ94YvTqRgdlhjR2r7F+eO3udy01s=</DigestValue></Reference><Reference URI="/word/theme/theme1.xml?ContentType=application/vnd.openxmlformats-officedocument.theme+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>L8HrIbYZUORASW6Jbsljhmie2tLKO9ld8ME0syr+ZDE=</DigestValue></Reference><Reference URI="/word/webSettings.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>8JastnM5t30OLdmv2PyPNOe1YxAXc/Qz0O1UCFnWyxM=</DigestValue></Reference></Manifest><SignatureProperties><SignatureProperty Id="idSignatureTime" Target="#idPackageSignature"><mdssi:SignatureTime xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature"><mdssi:Format>YYYY-MM-DDThh:mm:ssTZD</mdssi:Format><mdssi:Value>2016-01-11T15:32:02Z</mdssi:Value></mdssi:SignatureTime></SignatureProperty></SignatureProperties></Object><Object Id="idOfficeObject"><SignatureProperties><SignatureProperty Id="idOfficeV1Details" Target="#idPackageSignature"><SignatureInfoV1 xmlns="http://schemas.microsoft.com/office/2006/digsig"><SetupID></SetupID><SignatureText></SignatureText><SignatureImage/><SignatureComments>purpose</SignatureComments><WindowsVersion>6.1</WindowsVersion><OfficeVersion>16.0</OfficeVersion><ApplicationVersion>16.0</ApplicationVersion><Monitors>1</Monitors><HorizontalResolution>1280</HorizontalResolution><VerticalResolution>800</VerticalResolution><ColorDepth>32</ColorDepth><SignatureProviderId>{00000000-0000-0000-0000-000000000000}</SignatureProviderId><SignatureProviderUrl></SignatureProviderUrl><SignatureProviderDetails>9</SignatureProviderDetails><SignatureType>1</SignatureType></SignatureInfoV1></SignatureProperty></SignatureProperties></Object><Object><xd:QualifyingProperties xmlns:xd="http://uri.etsi.org/01903/v1.3.2#" Target="#idPackageSignature"><xd:SignedProperties Id="idSignedProperties"><xd:SignedSignatureProperties><xd:SigningTime>2016-01-11T15:32:02Z</xd:SigningTime><xd:SigningCertificate><xd:Cert><xd:CertDigest><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>VRk3WQgpNIAnRxA9txzm7L2v1Iuwu2WrHpwa5WNTKV0=</DigestValue></xd:CertDigest><xd:IssuerSerial><X509IssuerName>CN=TSCP Intermediate Root CA, O=TSCP Test, S=England, C=UK</X509IssuerName><X509SerialNumber>4096</X509SerialNumber></xd:IssuerSerial></xd:Cert></xd:SigningCertificate><xd:SignaturePolicyIdentifier><xd:SignaturePolicyImplied/></xd:SignaturePolicyIdentifier></xd:SignedSignatureProperties></xd:SignedProperties><xd:UnsignedProperties><xd:UnsignedSignatureProperties><xd:CertificateValues><xd:EncapsulatedX509Certificate>MIIFiDCCA3CgAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwTzELMAkGA1UEBhMCVUsxEDAOBgNVBAgMB0VuZ2xhbmQxEjAQBgNVBAoMCVRTQ1AgVGVzdDEaMBgGA1UEAwwRVFNDUCBUZXN0IFJvb3QgQ0EwHhcNMTUxMjE4MDc1ODE5WhcNMjUxMjE1MDc1ODE5WjBXMQswCQYDVQQGEwJVSzEQMA4GA1UECAwHRW5nbGFuZDESMBAGA1UECgwJVFNDUCBUZXN0MSIwIAYDVQQDDBlUU0NQIEludGVybWVkaWF0ZSBSb290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAz7cet97WutWedG8L4cU7vjXkkzc8PkXnuvoF6ltIbDbnt2jt+0JH8xjGpfMVF+si0RPMwNQDmrTSj6SCpBcFV11hgyLXeovO17cSfbUYhl53ghyY0w/J3We19JJNeXtXlgZKrzre1gaXPxUG6Q4/TGo/gh9DmqEWjGkL3E6FboNfrIXHvOyhYT0kuTJkp8uqbBhhb/jO/FUV2DvTdoX04YZlFsheP00FHYz5ge6e3tG/WSeabTCUOsqq4+1jFOGNapvBrMkYC4QJwOc4bjIP8UmMlHqJae2mQVTVK3hiXtdMuS/H7rM/ZzCzcNQ2kyXUr843oul2FZ9UK3HI0TVrBFi0mpcWtvo/PMLNNXHJ/2MVZcHVgQ5+uBk0/Zq1vrqmxEev5t9lkKIzXRdztKP/EuBEjSvtcx+KZiHei+CvN+5nn5D0A6BfMYiQKmr2lVkCDs5v++iLINGbf54UoeM/dv2VEaC9bcdPeR8JHcvzS1f/cjHAbmnbMUumX985jha9SJavXkgWU6LgL5+txGD2X4Sb/+cf4ver1Zd1QGxqZ80wwK/qRUFGHWehcjCKAzeqnMRTJLUeNH5TcnvYv5fRq9wDMLzcjXXHaKEE+n3xt4fyCdEn5AFWeCPenqW3iZpEJTOHXm65Gj33/OTqoMLj8Mas3shoES1tL7LqQu/ZhgMCAwEAAaNmMGQwHQYDVR0OBBYEFMuejS1rWjUf3x1+2QbPSVpuXFl+MB8GA1UdIwQYMBaAFBtcCsdShJelvNJRLLHhVOy17wRXMBIGA1UdEwEB/wQIMAYBAf8CAQAwDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEBCwUAA4ICAQAzYDupn8D+Vh5NYcyiufM86F7GqmQ8Rows5R9o1c/5iax4G95Ley5EjUXo7/Hq2JSmtGVwkmlCBKduxsWmkbcBCJwr5AFX4TY8QFgTwKm4+IDlDA+Qs5m5bFKwbpJ8+oakz2L4j032hh6pRlezYh0P6ciVrTUNIFdR4GLusV8ronHa2AIJy9OIihI8zwDvT5rlPtDVs/wPiSq5+qcM/wnKo8X1JYU/tM1w6xvge0WNIf2yzedl2jZbbQm6wmCioCMZ7nUyyywC7WYFgCgOwOKfEa7pWhwCXpWt4MetNzXSpumurhrmn7B8y6NNarHNMhB9xv4Do4VwezMRCydnOGkl/B2fMMSoS3hxItJWDzEMkD51M6uk3yGnrQnApfEcGhEUTE8WvR8Il+Od4qFX7r50A5LguHXc4EshDJU4IaZEcYvOu91Xh2vsIZU72CXUELqMwJB7NjAkVwv8dQzHbKnK4E6y0zO1dHsjvwBfJl56PNRYx9fxJoBqdK4VrVVZAQ+wQ7wrvbjF2p5EnciHQS1NmLJjNeVvCY1dprYjp9mDxBFNNm4DhXYHF1TXpDqS7nC5cdJlTtdg4LxI4isMY/R1plDq2oxwaxhd1+5CxtMTsuv1A2qFvaRLUNS8SQE6PjWz1NxdczS5aBrXBfBFhxkEP3AtEbAyv0HiHidUAcq4pw==</xd:EncapsulatedX509Certificate><xd:EncapsulatedX509Certificate>MIIFhDCCA2ygAwIBAgIJAIc74NSdmtB4MA0GCSqGSIb3DQEBCwUAME8xCzAJBgNVBAYTAlVLMRAwDgYDVQQIDAdFbmdsYW5kMRIwEAYDVQQKDAlUU0NQIFRlc3QxGjAYBgNVBAMMEVRTQ1AgVGVzdCBSb290IENBMB4XDTE1MTIxODA3NTgxOFoXDTM1MTIxMzA3NTgxOFowTzELMAkGA1UEBhMCVUsxEDAOBgNVBAgMB0VuZ2xhbmQxEjAQBgNVBAoMCVRTQ1AgVGVzdDEaMBgGA1UEAwwRVFNDUCBUZXN0IFJvb3QgQ0EwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCwJHjSIV2YE31STJk/bvGrTY5O949l2TnA0nt3nlIC+oe2O+dLnMIwZsSvZwZdGMjvc25AoQHJAiX8zeq/0AVMz9mhm9y239ziJBnSdP4eLBEntPFtZZooFQUV9uXHejIJs3czZmQsOBz9Ko5cL3fC0qZYXeNL14MGpAWQ0EbEMaz0uDSz3BCcJBLSgXmCEBCbXNP7mCt9vWoa0nE8HQUVmiB+SL9ltCLUojVEB9EsssH1Wo3rFR03Wk9ZbMcmmn8Av1ZLexFDD2TKhGcNHNfguB5rf+Sc/Vt45pSPemE60ro1ej4n/wwpyFM+5w6rOYqZTaBiRwzgTwYoS1JcgKnu0ACIxYqpIhxeBbNT6rA45bBvTQOCNdUWALyRkTiLYiX5HKbrrhg2ZsmX62GwEPtm+okJIQLmQp7nLfQMDONrETrBnj/D2aUo6LuwtONBD35c/hLKTMVCDIBOS35Js55GJr2Y+dhG5ly0dKoMnu0fXScrEYQzjJ1Pbv7zvpO7RBV0El8Qg8AfR2ZAD/UlugRnEDCyVhAXz8g333r0whz3LvacHTwMJoMXFXt9yVnt3ivrZ2NGLcvSNx2ZjBU7Y2EWJoEKGl49+idqmHK1Equ3dM+FNiCNX2PCEL3DGs8GpuzFFM9d9q863xCCNRecgG8rrQKSo328Q656g3iSUqFcCwIDAQABo2MwYTAdBgNVHQ4EFgQUG1wKx1KEl6W80lEsseFU7LXvBFcwHwYDVR0jBBgwFoAUG1wKx1KEl6W80lEsseFU7LXvBFcwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBADNoTWI0fdkdQW8/knVy+pEYh4y4f/9rit55MznmMsuOTHv9svNH9AMw/ZBvsfu4fEcXQyhmeonRMPdsz26ZigxG4k2wcN9fV2VMI3MlIbefCJZhIS89c3kyBF5MhbnaWxvP95nfa41cfbsXnSrj31VNMSXiA6YegbiL/v/0IkUd1mwXcXTcyxbkMLuTORgA6WiPpHVMN//YnQSMWa9ukh0uHsAuoDc2NvteXZsQkpOdZdJB4pIL2t68agyGy1Wv78jiaoxVbfYL4T3TJMxIGFSZFoV+1SYmVsfPxrSQ7vopT6y61r9c/b2fTPaHO+22pFx8lNsHII5kXpWQZIpRHgqjydN/VtnaC6dKq1lPvQTplzQVfCNWGaa/BddinGNV2qwo2a1QnEv7/6t1Gtxs5rte31aCNkfIx/mThk10fMmwJK9ECWKT/+X5iWpydI3zBIE+OvO0MKpaOeVVz1JehkZxGWP+qduF8lgL7Hs4osQNbQIu41twarpSjVCEm/FLVqo8wLmTi2Y5a7QTANeNLdyAKxjTJn3uuAvVYUUHzKOXgKF/X0tCZrUsH3//2MW3nqenN0ldXIzf7OjnVcbv2iKUzqYFzJHYtAuWHbew/kj2TvWeFFzdigLYjfnfZvHQ5sPxwrx6YpDeNf8inj48oEj6Raos0ClF0nmVg2eLMe9f</xd:EncapsulatedX509Certificate></xd:CertificateValues></xd:UnsignedSignatureProperties></xd:UnsignedProperties></xd:QualifyingProperties></Object></Signature> \ No newline at end of file
diff --git a/sax/source/expatwrap/expwrap.component b/sax/source/expatwrap/expwrap.component
new file mode 100644
index 000000000..1f72eccf3
--- /dev/null
+++ b/sax/source/expatwrap/expwrap.component
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ -->
+
+<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@"
+ xmlns="http://openoffice.org/2010/uno-components">
+ <implementation name="com.sun.star.comp.extensions.xml.sax.ParserExpat"
+ constructor="com_sun_star_comp_extensions_xml_sax_ParserExpat_get_implementation">
+ <service name="com.sun.star.xml.sax.Parser"/>
+ </implementation>
+ <implementation name="com.sun.star.extensions.xml.sax.Writer"
+ constructor="com_sun_star_extensions_xml_sax_Writer_get_implementation">
+ <service name="com.sun.star.xml.sax.Writer"/>
+ </implementation>
+ <implementation name="com.sun.star.comp.extensions.xml.sax.FastParser"
+ constructor="com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation">
+ <service name="com.sun.star.xml.sax.FastParser"/>
+ </implementation>
+ <implementation name="com.sun.star.comp.extensions.xml.sax.LegacyFastParser"
+ constructor="com_sun_star_comp_extensions_xml_sax_LegacyFastParser_get_implementation">
+ <service name="com.sun.star.xml.sax.LegacyFastParser"/>
+ </implementation>
+</component>
diff --git a/sax/source/expatwrap/sax_expat.cxx b/sax/source/expatwrap/sax_expat.cxx
new file mode 100644
index 000000000..78b7e2252
--- /dev/null
+++ b/sax/source/expatwrap/sax_expat.cxx
@@ -0,0 +1,961 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <string.h>
+#include <cassert>
+#include <memory>
+#include <mutex>
+#include <utility>
+#include <string_view>
+#include <vector>
+
+
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/uno/XComponentContext.hpp>
+#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
+#include <com/sun/star/xml/sax/XParser.hpp>
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+#include <com/sun/star/io/IOException.hpp>
+#include <com/sun/star/io/XSeekable.hpp>
+#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
+
+#include <comphelper/attributelist.hxx>
+#include <cppuhelper/weak.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <rtl/ref.hxx>
+#include <sal/log.hxx>
+
+#include <expat.h>
+
+using namespace ::std;
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::xml::sax;
+using namespace ::com::sun::star::io;
+
+#include <xml2utf.hxx>
+
+namespace {
+
+#define XML_CHAR_TO_OUSTRING(x) OUString(x , strlen( x ), RTL_TEXTENCODING_UTF8)
+#define XML_CHAR_N_TO_USTRING(x,n) OUString(x,n, RTL_TEXTENCODING_UTF8 )
+
+
+/*
+* The following macro encapsulates any call to an event handler.
+* It ensures, that exceptions thrown by the event handler are
+* treated properly.
+*/
+#define CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pThis,call) \
+ if( ! pThis->bExceptionWasThrown ) { \
+ try {\
+ pThis->call;\
+ }\
+ catch( const SAXParseException &e ) {\
+ callErrorHandler( pThis , e );\
+ }\
+ catch( const SAXException &e ) {\
+ callErrorHandler( pThis , SAXParseException(\
+ e.Message, \
+ e.Context, \
+ e.WrappedException,\
+ pThis->rDocumentLocator->getPublicId(),\
+ pThis->rDocumentLocator->getSystemId(),\
+ pThis->rDocumentLocator->getLineNumber(),\
+ pThis->rDocumentLocator->getColumnNumber()\
+ ) );\
+ }\
+ catch( const css::uno::RuntimeException &e ) {\
+ pThis->bExceptionWasThrown = true; \
+ pThis->bRTExceptionWasThrown = true; \
+ pImpl->rtexception = e; \
+ }\
+ catch( const css::uno::Exception &e ) {\
+ pThis->bExceptionWasThrown = true; \
+ pThis->bRTExceptionWasThrown = true; \
+ pImpl->rtexception = WrappedTargetRuntimeException("Non-runtime UNO exception caught during parse", e.Context, css::uno::Any(e)); \
+ }\
+ }\
+ ((void)0)
+
+
+class SaxExpatParser_Impl;
+
+// This class implements the external Parser interface
+class SaxExpatParser
+ : public WeakImplHelper< XInitialization
+ , XServiceInfo
+ , XParser >
+{
+
+public:
+ SaxExpatParser();
+
+ // css::lang::XInitialization:
+ virtual void SAL_CALL initialize(css::uno::Sequence<css::uno::Any> const& rArguments) override;
+
+ // The SAX-Parser-Interface
+ virtual void SAL_CALL parseStream( const InputSource& structSource) override;
+ virtual void SAL_CALL setDocumentHandler(const css::uno::Reference< XDocumentHandler > & xHandler) override;
+
+ virtual void SAL_CALL setErrorHandler(const css::uno::Reference< XErrorHandler > & xHandler) override;
+ virtual void SAL_CALL setDTDHandler(const css::uno::Reference < XDTDHandler > & xHandler) override;
+ virtual void SAL_CALL setEntityResolver(const css::uno::Reference< XEntityResolver >& xResolver) override;
+
+ virtual void SAL_CALL setLocale( const Locale &locale ) override;
+
+public: // XServiceInfo
+ OUString SAL_CALL getImplementationName() override;
+ css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+ sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
+
+private:
+ std::unique_ptr<SaxExpatParser_Impl> m_pImpl;
+};
+
+
+// Entity binds all information needed for a single file
+struct Entity
+{
+ InputSource structSource;
+ XML_Parser pParser;
+ sax_expatwrap::XMLFile2UTFConverter converter;
+};
+
+
+constexpr OUStringLiteral gsCDATA = u"CDATA";
+
+class SaxExpatParser_Impl
+{
+public: // module scope
+ std::mutex aMutex;
+ bool m_bEnableDoS; // fdo#60471 thank you Adobe Illustrator
+
+ css::uno::Reference< XDocumentHandler > rDocumentHandler;
+ css::uno::Reference< XExtendedDocumentHandler > rExtendedDocumentHandler;
+
+ css::uno::Reference< XErrorHandler > rErrorHandler;
+ css::uno::Reference< XDTDHandler > rDTDHandler;
+ css::uno::Reference< XEntityResolver > rEntityResolver;
+ css::uno::Reference < XLocator > rDocumentLocator;
+
+
+ rtl::Reference < comphelper::AttributeList > rAttrList;
+
+ // External entity stack
+ vector<struct Entity> vecEntity;
+ void pushEntity( Entity &&entity )
+ { vecEntity.push_back( std::move(entity) ); }
+ void popEntity()
+ { vecEntity.pop_back( ); }
+ struct Entity &getEntity()
+ { return vecEntity.back(); }
+
+
+ // Exception cannot be thrown through the C-XmlParser (possible resource leaks),
+ // therefore the exception must be saved somewhere.
+ SAXParseException exception;
+ css::uno::RuntimeException rtexception;
+ bool bExceptionWasThrown;
+ bool bRTExceptionWasThrown;
+
+public:
+ SaxExpatParser_Impl()
+ : m_bEnableDoS(false)
+ , bExceptionWasThrown(false)
+ , bRTExceptionWasThrown(false)
+ {
+ }
+
+ // the C-Callbacks for the expat parser
+ void static callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts);
+ void static callbackEndElement(void *userData, const XML_Char *name);
+ void static callbackCharacters( void *userData , const XML_Char *s , int nLen );
+ void static callbackProcessingInstruction( void *userData ,
+ const XML_Char *sTarget ,
+ const XML_Char *sData );
+
+ void static callbackEntityDecl( void *userData ,
+ const XML_Char *entityName,
+ int is_parameter_entity,
+ const XML_Char *value,
+ int value_length,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId,
+ const XML_Char *notationName);
+
+ void static callbackNotationDecl( void *userData,
+ const XML_Char *notationName,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId);
+
+ bool static callbackExternalEntityRef( XML_Parser parser,
+ const XML_Char *openEntityNames,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId);
+
+ int static callbackUnknownEncoding(void *encodingHandlerData,
+ const XML_Char *name,
+ XML_Encoding *info);
+
+ void static callbackDefault( void *userData, const XML_Char *s, int len);
+
+ void static callbackStartCDATA( void *userData );
+ void static callbackEndCDATA( void *userData );
+ void static callbackComment( void *userData , const XML_Char *s );
+ void static callErrorHandler( SaxExpatParser_Impl *pImpl , const SAXParseException &e );
+
+public:
+ void parse();
+};
+
+extern "C"
+{
+ static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts)
+ {
+ SaxExpatParser_Impl::callbackStartElement(userData,name,atts);
+ }
+ static void call_callbackEndElement(void *userData, const XML_Char *name)
+ {
+ SaxExpatParser_Impl::callbackEndElement(userData,name);
+ }
+ static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen )
+ {
+ SaxExpatParser_Impl::callbackCharacters(userData,s,nLen);
+ }
+ static void call_callbackProcessingInstruction(void *userData,const XML_Char *sTarget,const XML_Char *sData )
+ {
+ SaxExpatParser_Impl::callbackProcessingInstruction(userData,sTarget,sData );
+ }
+ static void call_callbackEntityDecl(void *userData ,
+ const XML_Char *entityName,
+ int is_parameter_entity,
+ const XML_Char *value,
+ int value_length,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId,
+ const XML_Char *notationName)
+ {
+ SaxExpatParser_Impl::callbackEntityDecl(userData, entityName,
+ is_parameter_entity, value, value_length,
+ base, systemId, publicId, notationName);
+ }
+ static void call_callbackNotationDecl(void *userData,
+ const XML_Char *notationName,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId)
+ {
+ SaxExpatParser_Impl::callbackNotationDecl(userData,notationName,base,systemId,publicId);
+ }
+ static int call_callbackExternalEntityRef(XML_Parser parser,
+ const XML_Char *openEntityNames,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId)
+ {
+ return SaxExpatParser_Impl::callbackExternalEntityRef(parser,openEntityNames,base,systemId,publicId);
+ }
+ static int call_callbackUnknownEncoding(void *encodingHandlerData,
+ const XML_Char *name,
+ XML_Encoding *info)
+ {
+ return SaxExpatParser_Impl::callbackUnknownEncoding(encodingHandlerData,name,info);
+ }
+ static void call_callbackDefault( void *userData, const XML_Char *s, int len)
+ {
+ SaxExpatParser_Impl::callbackDefault(userData,s,len);
+ }
+ static void call_callbackStartCDATA( void *userData )
+ {
+ SaxExpatParser_Impl::callbackStartCDATA(userData);
+ }
+ static void call_callbackEndCDATA( void *userData )
+ {
+ SaxExpatParser_Impl::callbackEndCDATA(userData);
+ }
+ static void call_callbackComment( void *userData , const XML_Char *s )
+ {
+ SaxExpatParser_Impl::callbackComment(userData,s);
+ }
+}
+
+
+// LocatorImpl
+
+class LocatorImpl :
+ public WeakImplHelper< XLocator, css::io::XSeekable >
+ // should use a different interface for stream positions!
+{
+public:
+ explicit LocatorImpl(SaxExpatParser_Impl *p)
+ : m_pParser(p)
+ {
+ }
+
+public: //XLocator
+ virtual sal_Int32 SAL_CALL getColumnNumber() override
+ {
+ return XML_GetCurrentColumnNumber( m_pParser->getEntity().pParser );
+ }
+ virtual sal_Int32 SAL_CALL getLineNumber() override
+ {
+ return XML_GetCurrentLineNumber( m_pParser->getEntity().pParser );
+ }
+ virtual OUString SAL_CALL getPublicId() override
+ {
+ return m_pParser->getEntity().structSource.sPublicId;
+ }
+ virtual OUString SAL_CALL getSystemId() override
+ {
+ return m_pParser->getEntity().structSource.sSystemId;
+ }
+
+ // XSeekable (only for getPosition)
+
+ virtual void SAL_CALL seek( sal_Int64 ) override
+ {
+ }
+ virtual sal_Int64 SAL_CALL getPosition() override
+ {
+ return XML_GetCurrentByteIndex( m_pParser->getEntity().pParser );
+ }
+ virtual ::sal_Int64 SAL_CALL getLength() override
+ {
+ return 0;
+ }
+
+private:
+
+ SaxExpatParser_Impl *m_pParser;
+};
+
+
+SaxExpatParser::SaxExpatParser( )
+{
+ m_pImpl.reset( new SaxExpatParser_Impl );
+
+ rtl::Reference<LocatorImpl> pLoc = new LocatorImpl( m_pImpl.get() );
+ m_pImpl->rDocumentLocator = pLoc;
+
+ // Performance-improvement; handing out the same object with every call of
+ // the startElement callback is allowed (see sax-specification):
+ m_pImpl->rAttrList = new comphelper::AttributeList;
+
+ m_pImpl->bExceptionWasThrown = false;
+ m_pImpl->bRTExceptionWasThrown = false;
+}
+
+// css::lang::XInitialization:
+void SAL_CALL
+SaxExpatParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
+{
+ // possible arguments: a string "DoSmeplease"
+ if (rArguments.hasElements())
+ {
+ OUString str;
+ if ((rArguments[0] >>= str) && "DoSmeplease" == str)
+ {
+ std::unique_lock guard( m_pImpl->aMutex );
+ m_pImpl->m_bEnableDoS = true;
+ }
+ }
+}
+
+class ParserCleanup
+{
+private:
+ SaxExpatParser_Impl& m_rParser;
+ XML_Parser m_xmlParser;
+public:
+ ParserCleanup(SaxExpatParser_Impl& rParser, XML_Parser xmlParser)
+ : m_rParser(rParser)
+ , m_xmlParser(xmlParser)
+ {
+ }
+ ~ParserCleanup()
+ {
+ m_rParser.popEntity();
+ //XML_ParserFree accepts a null arg
+ XML_ParserFree(m_xmlParser);
+ }
+};
+
+/***************
+*
+* parseStream does Parser-startup initializations. The SaxExpatParser_Impl::parse() method does
+* the file-specific initialization work. (During a parser run, external files may be opened)
+*
+****************/
+void SaxExpatParser::parseStream( const InputSource& structSource)
+{
+ // Only one text at one time
+ std::unique_lock guard( m_pImpl->aMutex );
+
+
+ struct Entity entity;
+ entity.structSource = structSource;
+
+ if( ! entity.structSource.aInputStream.is() )
+ {
+ throw SAXException("No input source",
+ css::uno::Reference< css::uno::XInterface > () , css::uno::Any() );
+ }
+
+ entity.converter.setInputStream( entity.structSource.aInputStream );
+ if( !entity.structSource.sEncoding.isEmpty() )
+ {
+ entity.converter.setEncoding(
+ OUStringToOString( entity.structSource.sEncoding , RTL_TEXTENCODING_ASCII_US ) );
+ }
+
+ // create parser with proper encoding
+ entity.pParser = XML_ParserCreate( nullptr );
+ if( ! entity.pParser )
+ {
+ throw SAXException("Couldn't create parser",
+ css::uno::Reference< css::uno::XInterface > (), css::uno::Any() );
+ }
+
+ // set all necessary C-Callbacks
+ XML_SetUserData( entity.pParser, m_pImpl.get() );
+ XML_SetElementHandler( entity.pParser ,
+ call_callbackStartElement ,
+ call_callbackEndElement );
+ XML_SetCharacterDataHandler( entity.pParser , call_callbackCharacters );
+ XML_SetProcessingInstructionHandler(entity.pParser ,
+ call_callbackProcessingInstruction );
+ if (!m_pImpl->m_bEnableDoS)
+ {
+ XML_SetEntityDeclHandler(entity.pParser, call_callbackEntityDecl);
+ }
+ XML_SetNotationDeclHandler( entity.pParser, call_callbackNotationDecl );
+ XML_SetExternalEntityRefHandler( entity.pParser,
+ call_callbackExternalEntityRef);
+ XML_SetUnknownEncodingHandler( entity.pParser, call_callbackUnknownEncoding ,nullptr);
+
+ if( m_pImpl->rExtendedDocumentHandler.is() ) {
+
+ // These handlers just delegate calls to the ExtendedHandler. If no extended handler is
+ // given, these callbacks can be ignored
+ XML_SetDefaultHandlerExpand( entity.pParser, call_callbackDefault );
+ XML_SetCommentHandler( entity.pParser, call_callbackComment );
+ XML_SetCdataSectionHandler( entity.pParser ,
+ call_callbackStartCDATA ,
+ call_callbackEndCDATA );
+ }
+
+
+ m_pImpl->exception = SAXParseException();
+ auto const xmlParser = entity.pParser;
+ m_pImpl->pushEntity( std::move(entity) );
+
+ ParserCleanup aEnsureFree(*m_pImpl, xmlParser);
+
+ // start the document
+ if( m_pImpl->rDocumentHandler.is() ) {
+ m_pImpl->rDocumentHandler->setDocumentLocator( m_pImpl->rDocumentLocator );
+ m_pImpl->rDocumentHandler->startDocument();
+ }
+
+ m_pImpl->parse();
+
+ // finish document
+ if( m_pImpl->rDocumentHandler.is() ) {
+ m_pImpl->rDocumentHandler->endDocument();
+ }
+}
+
+void SaxExpatParser::setDocumentHandler(const css::uno::Reference< XDocumentHandler > & xHandler)
+{
+ m_pImpl->rDocumentHandler = xHandler;
+ m_pImpl->rExtendedDocumentHandler =
+ css::uno::Reference< XExtendedDocumentHandler >( xHandler , css::uno::UNO_QUERY );
+}
+
+void SaxExpatParser::setErrorHandler(const css::uno::Reference< XErrorHandler > & xHandler)
+{
+ m_pImpl->rErrorHandler = xHandler;
+}
+
+void SaxExpatParser::setDTDHandler(const css::uno::Reference< XDTDHandler > & xHandler)
+{
+ m_pImpl->rDTDHandler = xHandler;
+}
+
+void SaxExpatParser::setEntityResolver(const css::uno::Reference < XEntityResolver > & xResolver)
+{
+ m_pImpl->rEntityResolver = xResolver;
+}
+
+
+void SaxExpatParser::setLocale( const Locale & )
+{
+ // not implemented
+}
+
+// XServiceInfo
+OUString SaxExpatParser::getImplementationName()
+{
+ return "com.sun.star.comp.extensions.xml.sax.ParserExpat";
+}
+
+// XServiceInfo
+sal_Bool SaxExpatParser::supportsService(const OUString& ServiceName)
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+// XServiceInfo
+css::uno::Sequence< OUString > SaxExpatParser::getSupportedServiceNames()
+{
+ return { "com.sun.star.xml.sax.Parser" };
+}
+
+
+/*---------------------------------------
+*
+* Helper functions and classes
+*
+*
+*-------------------------------------------*/
+OUString getErrorMessage( XML_Error xmlE, std::u16string_view sSystemId , sal_Int32 nLine )
+{
+ OUString Message;
+ if( XML_ERROR_NONE == xmlE ) {
+ Message = "No";
+ }
+ else if( XML_ERROR_NO_MEMORY == xmlE ) {
+ Message = "no memory";
+ }
+ else if( XML_ERROR_SYNTAX == xmlE ) {
+ Message = "syntax";
+ }
+ else if( XML_ERROR_NO_ELEMENTS == xmlE ) {
+ Message = "no elements";
+ }
+ else if( XML_ERROR_INVALID_TOKEN == xmlE ) {
+ Message = "invalid token";
+ }
+ else if( XML_ERROR_UNCLOSED_TOKEN == xmlE ) {
+ Message = "unclosed token";
+ }
+ else if( XML_ERROR_PARTIAL_CHAR == xmlE ) {
+ Message = "partial char";
+ }
+ else if( XML_ERROR_TAG_MISMATCH == xmlE ) {
+ Message = "tag mismatch";
+ }
+ else if( XML_ERROR_DUPLICATE_ATTRIBUTE == xmlE ) {
+ Message = "duplicate attribute";
+ }
+ else if( XML_ERROR_JUNK_AFTER_DOC_ELEMENT == xmlE ) {
+ Message = "junk after doc element";
+ }
+ else if( XML_ERROR_PARAM_ENTITY_REF == xmlE ) {
+ Message = "parameter entity reference";
+ }
+ else if( XML_ERROR_UNDEFINED_ENTITY == xmlE ) {
+ Message = "undefined entity";
+ }
+ else if( XML_ERROR_RECURSIVE_ENTITY_REF == xmlE ) {
+ Message = "recursive entity reference";
+ }
+ else if( XML_ERROR_ASYNC_ENTITY == xmlE ) {
+ Message = "async entity";
+ }
+ else if( XML_ERROR_BAD_CHAR_REF == xmlE ) {
+ Message = "bad char reference";
+ }
+ else if( XML_ERROR_BINARY_ENTITY_REF == xmlE ) {
+ Message = "binary entity reference";
+ }
+ else if( XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF == xmlE ) {
+ Message = "attribute external entity reference";
+ }
+ else if( XML_ERROR_MISPLACED_XML_PI == xmlE ) {
+ Message = "misplaced xml processing instruction";
+ }
+ else if( XML_ERROR_UNKNOWN_ENCODING == xmlE ) {
+ Message = "unknown encoding";
+ }
+ else if( XML_ERROR_INCORRECT_ENCODING == xmlE ) {
+ Message = "incorrect encoding";
+ }
+ else if( XML_ERROR_UNCLOSED_CDATA_SECTION == xmlE ) {
+ Message = "unclosed cdata section";
+ }
+ else if( XML_ERROR_EXTERNAL_ENTITY_HANDLING == xmlE ) {
+ Message = "external entity reference";
+ }
+ else if( XML_ERROR_NOT_STANDALONE == xmlE ) {
+ Message = "not standalone";
+ }
+
+ OUString str = OUString::Concat("[") +
+ sSystemId +
+ " line " +
+ OUString::number( nLine ) +
+ "]: " +
+ Message +
+ "error";
+
+ return str;
+}
+
+
+// starts parsing with actual parser !
+void SaxExpatParser_Impl::parse( )
+{
+ const int nBufSize = 16*1024;
+
+ int nRead = nBufSize;
+ css::uno::Sequence< sal_Int8 > seqOut(nBufSize);
+
+ while( nRead ) {
+ nRead = getEntity().converter.readAndConvert( seqOut , nBufSize );
+
+ bool bContinue(false);
+
+ if( ! nRead ) {
+ // last call - must return OK
+ XML_Status const ret = XML_Parse( getEntity().pParser,
+ reinterpret_cast<const char *>(seqOut.getConstArray()),
+ 0 ,
+ 1 );
+ if (ret == XML_STATUS_OK) {
+ break;
+ }
+ } else {
+ bContinue = ( XML_Parse( getEntity().pParser,
+ reinterpret_cast<const char *>(seqOut.getConstArray()),
+ nRead,
+ 0 ) != XML_STATUS_ERROR );
+ }
+
+ if( ! bContinue || bExceptionWasThrown ) {
+
+ if ( bRTExceptionWasThrown )
+ throw rtexception;
+
+ // Error during parsing !
+ XML_Error xmlE = XML_GetErrorCode( getEntity().pParser );
+ OUString sSystemId = rDocumentLocator->getSystemId();
+ sal_Int32 nLine = rDocumentLocator->getLineNumber();
+
+ SAXParseException aExcept(
+ getErrorMessage(xmlE , sSystemId, nLine) ,
+ css::uno::Reference< css::uno::XInterface >(),
+ css::uno::Any( &exception , cppu::UnoType<decltype(exception)>::get() ),
+ rDocumentLocator->getPublicId(),
+ rDocumentLocator->getSystemId(),
+ rDocumentLocator->getLineNumber(),
+ rDocumentLocator->getColumnNumber()
+ );
+
+ if( rErrorHandler.is() ) {
+
+ // error handler is set, so the handler may throw the exception
+ css::uno::Any a;
+ a <<= aExcept;
+ rErrorHandler->fatalError( a );
+ }
+
+ // Error handler has not thrown an exception, but parsing cannot go on,
+ // so an exception MUST be thrown.
+ throw aExcept;
+ } // if( ! bContinue )
+ } // while
+}
+
+
+// The C-Callbacks
+
+
+void SaxExpatParser_Impl::callbackStartElement( void *pvThis ,
+ const XML_Char *pwName ,
+ const XML_Char **awAttributes )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+
+ if( !pImpl->rDocumentHandler.is() )
+ return;
+
+ int i = 0;
+ pImpl->rAttrList->Clear();
+
+ while( awAttributes[i] ) {
+ assert(awAttributes[i+1]);
+ pImpl->rAttrList->AddAttribute(
+ XML_CHAR_TO_OUSTRING( awAttributes[i] ) ,
+ gsCDATA, // expat doesn't know types
+ XML_CHAR_TO_OUSTRING( awAttributes[i+1] ) );
+ i +=2;
+ }
+
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(
+ pImpl ,
+ rDocumentHandler->startElement( XML_CHAR_TO_OUSTRING( pwName ) ,
+ pImpl->rAttrList ) );
+}
+
+void SaxExpatParser_Impl::callbackEndElement( void *pvThis , const XML_Char *pwName )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+
+ if( pImpl->rDocumentHandler.is() ) {
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
+ rDocumentHandler->endElement( XML_CHAR_TO_OUSTRING( pwName ) ) );
+ }
+}
+
+
+void SaxExpatParser_Impl::callbackCharacters( void *pvThis , const XML_Char *s , int nLen )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+
+ if( pImpl->rDocumentHandler.is() ) {
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl ,
+ rDocumentHandler->characters( XML_CHAR_N_TO_USTRING(s,nLen) ) );
+ }
+}
+
+void SaxExpatParser_Impl::callbackProcessingInstruction( void *pvThis,
+ const XML_Char *sTarget ,
+ const XML_Char *sData )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+ if( pImpl->rDocumentHandler.is() ) {
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(
+ pImpl ,
+ rDocumentHandler->processingInstruction( XML_CHAR_TO_OUSTRING( sTarget ),
+ XML_CHAR_TO_OUSTRING( sData ) ) );
+ }
+}
+
+
+void SaxExpatParser_Impl::callbackEntityDecl(
+ void *pvThis, const XML_Char *entityName,
+ SAL_UNUSED_PARAMETER int /*is_parameter_entity*/,
+ const XML_Char *value, SAL_UNUSED_PARAMETER int /*value_length*/,
+ SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId,
+ const XML_Char *publicId, const XML_Char *notationName)
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+ if (value) { // value != 0 means internal entity
+ SAL_INFO("sax","SaxExpatParser: internal entity declaration, stopping");
+ XML_StopParser(pImpl->getEntity().pParser, XML_FALSE);
+ pImpl->exception = SAXParseException(
+ "SaxExpatParser: internal entity declaration, stopping",
+ nullptr, css::uno::Any(),
+ pImpl->rDocumentLocator->getPublicId(),
+ pImpl->rDocumentLocator->getSystemId(),
+ pImpl->rDocumentLocator->getLineNumber(),
+ pImpl->rDocumentLocator->getColumnNumber() );
+ pImpl->bExceptionWasThrown = true;
+ } else {
+ if( pImpl->rDTDHandler.is() ) {
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(
+ pImpl ,
+ rDTDHandler->unparsedEntityDecl(
+ XML_CHAR_TO_OUSTRING( entityName ),
+ XML_CHAR_TO_OUSTRING( publicId ) ,
+ XML_CHAR_TO_OUSTRING( systemId ) ,
+ XML_CHAR_TO_OUSTRING( notationName ) ) );
+ }
+ }
+}
+
+void SaxExpatParser_Impl::callbackNotationDecl(
+ void *pvThis, const XML_Char *notationName,
+ SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId,
+ const XML_Char *publicId)
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+ if( pImpl->rDTDHandler.is() ) {
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
+ rDTDHandler->notationDecl( XML_CHAR_TO_OUSTRING( notationName ) ,
+ XML_CHAR_TO_OUSTRING( publicId ) ,
+ XML_CHAR_TO_OUSTRING( systemId ) ) );
+ }
+
+}
+
+
+bool SaxExpatParser_Impl::callbackExternalEntityRef(
+ XML_Parser parser, const XML_Char *context,
+ SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId,
+ const XML_Char *publicId)
+{
+ bool bOK = true;
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(XML_GetUserData( parser ));
+
+ struct Entity entity;
+
+ if( pImpl->rEntityResolver.is() ) {
+ try
+ {
+ entity.structSource = pImpl->rEntityResolver->resolveEntity(
+ XML_CHAR_TO_OUSTRING( publicId ) ,
+ XML_CHAR_TO_OUSTRING( systemId ) );
+ }
+ catch( const SAXParseException & e )
+ {
+ pImpl->exception = e;
+ bOK = false;
+ }
+ catch( const SAXException & e )
+ {
+ pImpl->exception = SAXParseException(
+ e.Message , e.Context , e.WrappedException ,
+ pImpl->rDocumentLocator->getPublicId(),
+ pImpl->rDocumentLocator->getSystemId(),
+ pImpl->rDocumentLocator->getLineNumber(),
+ pImpl->rDocumentLocator->getColumnNumber() );
+ bOK = false;
+ }
+ }
+
+ if( entity.structSource.aInputStream.is() ) {
+ entity.pParser = XML_ExternalEntityParserCreate( parser , context, nullptr );
+ if( ! entity.pParser )
+ {
+ return false;
+ }
+
+ entity.converter.setInputStream( entity.structSource.aInputStream );
+ auto const xmlParser = entity.pParser;
+ pImpl->pushEntity( std::move(entity) );
+ try
+ {
+ pImpl->parse();
+ }
+ catch( const SAXParseException & e )
+ {
+ pImpl->exception = e;
+ bOK = false;
+ }
+ catch( const IOException &e )
+ {
+ pImpl->exception.WrappedException <<= e;
+ bOK = false;
+ }
+ catch( const css::uno::RuntimeException &e )
+ {
+ pImpl->exception.WrappedException <<=e;
+ bOK = false;
+ }
+
+ pImpl->popEntity();
+
+ XML_ParserFree( xmlParser );
+ }
+
+ return bOK;
+}
+
+int SaxExpatParser_Impl::callbackUnknownEncoding(
+ SAL_UNUSED_PARAMETER void * /*encodingHandlerData*/,
+ SAL_UNUSED_PARAMETER const XML_Char * /*name*/,
+ SAL_UNUSED_PARAMETER XML_Encoding * /*info*/)
+{
+ return 0;
+}
+
+void SaxExpatParser_Impl::callbackDefault( void *pvThis, const XML_Char *s, int len)
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
+ rExtendedDocumentHandler->unknown( XML_CHAR_N_TO_USTRING( s ,len) ) );
+}
+
+void SaxExpatParser_Impl::callbackComment( void *pvThis , const XML_Char *s )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl,
+ rExtendedDocumentHandler->comment( XML_CHAR_TO_OUSTRING( s ) ) );
+}
+
+void SaxExpatParser_Impl::callbackStartCDATA( void *pvThis )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, rExtendedDocumentHandler->startCDATA() );
+}
+
+
+void SaxExpatParser_Impl::callErrorHandler( SaxExpatParser_Impl *pImpl ,
+ const SAXParseException & e )
+{
+ try
+ {
+ if( pImpl->rErrorHandler.is() ) {
+ css::uno::Any a;
+ a <<= e;
+ pImpl->rErrorHandler->error( a );
+ }
+ else {
+ pImpl->exception = e;
+ pImpl->bExceptionWasThrown = true;
+ }
+ }
+ catch( const SAXParseException & ex ) {
+ pImpl->exception = ex;
+ pImpl->bExceptionWasThrown = true;
+ }
+ catch( const SAXException & ex ) {
+ pImpl->exception = SAXParseException(
+ ex.Message,
+ ex.Context,
+ ex.WrappedException,
+ pImpl->rDocumentLocator->getPublicId(),
+ pImpl->rDocumentLocator->getSystemId(),
+ pImpl->rDocumentLocator->getLineNumber(),
+ pImpl->rDocumentLocator->getColumnNumber()
+ );
+ pImpl->bExceptionWasThrown = true;
+ }
+}
+
+void SaxExpatParser_Impl::callbackEndCDATA( void *pvThis )
+{
+ SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis);
+
+ CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pImpl,rExtendedDocumentHandler->endCDATA() );
+}
+
+} // namespace
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_comp_extensions_xml_sax_ParserExpat_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new SaxExpatParser);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx
new file mode 100644
index 000000000..e19a31211
--- /dev/null
+++ b/sax/source/expatwrap/saxwriter.cxx
@@ -0,0 +1,1486 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <string.h>
+
+#include <cassert>
+#include <set>
+#include <stack>
+#include <vector>
+
+#include <com/sun/star/io/IOException.hpp>
+#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/uno/XComponentContext.hpp>
+#include <com/sun/star/util/XCloneable.hpp>
+#include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
+#include <com/sun/star/xml/sax/XWriter.hpp>
+
+#include <cppuhelper/exc_hlp.hxx>
+#include <cppuhelper/weak.hxx>
+#include <cppuhelper/implbase.hxx>
+#include <cppuhelper/supportsservice.hxx>
+
+#include <osl/diagnose.h>
+#include <rtl/character.hxx>
+#include <sal/log.hxx>
+
+using namespace ::std;
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::xml::sax;
+using namespace ::com::sun::star::util;
+using namespace ::com::sun::star::io;
+
+#include <memory>
+
+#define LINEFEED 10
+#define SEQUENCESIZE 1024
+#define MAXCOLUMNCOUNT 72
+
+/******
+*
+*
+* Character conversion functions
+*
+*
+*****/
+
+namespace
+{
+enum SaxInvalidCharacterError
+{
+ SAX_NONE,
+ SAX_WARNING,
+ SAX_ERROR
+};
+
+// Stuff for custom entity names
+struct ReplacementPair
+{
+ OUString name;
+ OUString replacement;
+};
+inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
+{
+ return lhs.replacement.compareTo(rhs.replacement) < 0;
+}
+
+class SaxWriterHelper
+{
+#ifdef DBG_UTIL
+public:
+ ::std::stack<OUString> m_DebugStartedElements;
+#endif
+
+private:
+ Reference<XOutputStream> m_out;
+ Sequence<sal_Int8> m_Sequence;
+ sal_Int8* mp_Sequence;
+
+ sal_Int32 nLastLineFeedPos; // is negative after writing a sequence
+ sal_uInt32 nCurrentPos;
+ bool m_bStartElementFinished;
+
+ std::vector<ReplacementPair> m_Replacements;
+
+ /// @throws SAXException
+ sal_uInt32 writeSequence();
+
+ // use only if to insert the bytes more space in the sequence is needed and
+ // so the sequence has to write out and reset rPos to 0
+ // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
+ /// @throws SAXException
+ void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes,
+ sal_uInt32 nBytesCount);
+ /// @throws SAXException
+ bool convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, bool bDoNormalization,
+ bool bNormalizeWhitespace, sal_Int8* pTarget, sal_uInt32& rPos);
+ /// @throws SAXException
+ void FinishStartElement();
+
+ // Search for the correct replacement
+ const ReplacementPair* findXMLReplacement(const sal_Unicode* pStr, sal_Int32 nStrLen);
+
+public:
+ explicit SaxWriterHelper(Reference<XOutputStream> const& m_TempOut)
+ : m_out(m_TempOut)
+ , m_Sequence(SEQUENCESIZE)
+ , mp_Sequence(nullptr)
+ , nLastLineFeedPos(0)
+ , nCurrentPos(0)
+ , m_bStartElementFinished(true)
+ {
+ OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size too small");
+ mp_Sequence = m_Sequence.getArray();
+ }
+ ~SaxWriterHelper()
+ {
+ OSL_ENSURE(!nCurrentPos, "cached Sequence not written");
+ OSL_ENSURE(m_bStartElementFinished, "StartElement not completely written");
+ }
+
+ /// @throws SAXException
+ void insertIndentation(sal_uInt32 m_nLevel);
+
+ // returns whether it works correct or invalid characters were in the string
+ // If there are invalid characters in the string it returns sal_False.
+ // Than the calling method has to throw the needed Exception.
+ /// @throws SAXException
+ bool writeString(const OUString& rWriteOutString, bool bDoNormalization,
+ bool bNormalizeWhitespace);
+
+ sal_uInt32 GetLastColumnCount() const noexcept
+ {
+ return static_cast<sal_uInt32>(nCurrentPos - nLastLineFeedPos);
+ }
+
+ /// @throws SAXException
+ void startDocument();
+
+ // returns whether it works correct or invalid characters were in the strings
+ // If there are invalid characters in one of the strings it returns sal_False.
+ // Than the calling method has to throw the needed Exception.
+ /// @throws SAXException
+ SaxInvalidCharacterError startElement(const OUString& rName,
+ const Reference<XAttributeList>& xAttribs);
+ /// @throws SAXException
+ bool FinishEmptyElement();
+
+ // returns whether it works correct or invalid characters were in the string
+ // If there are invalid characters in the string it returns sal_False.
+ // Than the calling method has to throw the needed Exception.
+ /// @throws SAXException
+ bool endElement(const OUString& rName);
+ /// @throws SAXException
+ void endDocument();
+
+ // returns whether it works correct or invalid characters were in the strings
+ // If there are invalid characters in the string it returns sal_False.
+ // Than the calling method has to throw the needed Exception.
+ /// @throws SAXException
+ bool processingInstruction(const OUString& rTarget, const OUString& rData);
+ /// @throws SAXException
+ void startCDATA();
+ /// @throws SAXException
+ void endCDATA();
+
+ // returns whether it works correct or invalid characters were in the strings
+ // If there are invalid characters in the string it returns sal_False.
+ // Than the calling method has to throw the needed Exception.
+ /// @throws SAXException
+ bool comment(const OUString& rComment);
+
+ /// @throws SAXException
+ void clearBuffer();
+
+ // Use custom entity names
+ void setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>&
+ replacements);
+
+ // Calculate length for convertToXML
+ sal_Int32 calcXMLByteLength(const OUString& rStr, bool bDoNormalization,
+ bool bNormalizeWhitespace);
+};
+
+const bool g_bValidCharsBelow32[32] = {
+ // clang-format off
+// 0 1 2 3 4 5 6 7
+ false, false, false, false, false, false, false, false, //0
+ false, true, true, false, false, true, false, false, //8
+ false, false, false, false, false, false, false, false, //16
+ false, false, false, false, false, false, false, false
+ // clang-format on
+};
+
+bool IsInvalidChar(const sal_Unicode aChar)
+{
+ bool bRet(false);
+ // check first for the most common characters
+ if (aChar < 32 || aChar >= 0xd800)
+ bRet = ((aChar < 32 && !g_bValidCharsBelow32[aChar]) || aChar == 0xffff || aChar == 0xfffe);
+ return bRet;
+}
+
+/********
+* write through to the output stream
+*
+*****/
+sal_uInt32 SaxWriterHelper::writeSequence()
+{
+ try
+ {
+ m_out->writeBytes(m_Sequence);
+ }
+ catch (const IOException&)
+ {
+ css::uno::Any anyEx = cppu::getCaughtException();
+ throw SAXException("IO exception during writing", Reference<XInterface>(), anyEx);
+ }
+ nLastLineFeedPos -= SEQUENCESIZE;
+ return 0;
+}
+
+void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes,
+ sal_uInt32 nBytesCount)
+{
+ OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod");
+ sal_uInt32 nCount(SEQUENCESIZE - rPos);
+ memcpy(&(pTarget[rPos]), pBytes, nCount);
+
+ OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end");
+
+ rPos = writeSequence();
+ sal_uInt32 nRestCount(nBytesCount - nCount);
+ if ((rPos + nRestCount) <= SEQUENCESIZE)
+ {
+ memcpy(&(pTarget[rPos]), &pBytes[nCount], nRestCount);
+ rPos += nRestCount;
+ }
+ else
+ AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount);
+}
+
+void SaxWriterHelper::setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
+{
+ m_Replacements.resize(replacements.size());
+ for (size_t i = 0; i < replacements.size(); ++i)
+ {
+ m_Replacements[i].name = replacements[i].First;
+ m_Replacements[i].replacement = replacements[i].Second;
+ }
+ if (replacements.size() > 1)
+ std::sort(m_Replacements.begin(), m_Replacements.end());
+}
+
+/** Converts a UTF-16 string to UTF-8 and does XML normalization
+
+ @param pTarget
+ Pointer to a piece of memory, to where the output should be written. The caller
+ must call calcXMLByteLength on the same string, to ensure,
+ that there is enough memory for converting.
+ */
+bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen,
+ bool bDoNormalization, bool bNormalizeWhitespace,
+ sal_Int8* pTarget, sal_uInt32& rPos)
+{
+ bool bRet(true);
+ sal_uInt32 nSurrogate = 0;
+
+ for (sal_Int32 i = 0; i < nStrLen; i++)
+ {
+ sal_Unicode c = pStr[i];
+ if (IsInvalidChar(c))
+ bRet = false;
+ else if ((c >= 0x0001) && (c <= 0x007F)) // Deal with ascii
+ {
+ if (bDoNormalization)
+ {
+ switch (c)
+ {
+ case '&': // resemble to &amp;
+ {
+ if ((rPos + 5) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&amp;"), 5);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&amp;", 5);
+ rPos += 5;
+ }
+ }
+ break;
+ case '<':
+ {
+ if ((rPos + 4) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&lt;"), 4);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&lt;", 4);
+ rPos += 4; // &lt;
+ }
+ }
+ break;
+ case '>':
+ {
+ if ((rPos + 4) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&gt;"), 4);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&gt;", 4);
+ rPos += 4; // &gt;
+ }
+ }
+ break;
+ case '\'':
+ {
+ if ((rPos + 6) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&apos;"), 6);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&apos;", 6);
+ rPos += 6; // &apos;
+ }
+ }
+ break;
+ case '"':
+ {
+ if ((rPos + 6) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&quot;"), 6);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&quot;", 6);
+ rPos += 6; // &quot;
+ }
+ }
+ break;
+ case 13:
+ {
+ if ((rPos + 6) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&#x0d;"), 6);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&#x0d;", 6);
+ rPos += 6;
+ }
+ }
+ break;
+ case LINEFEED:
+ {
+ if (bNormalizeWhitespace)
+ {
+ if ((rPos + 6) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&#x0a;"),
+ 6);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&#x0a;", 6);
+ rPos += 6;
+ }
+ }
+ else
+ {
+ pTarget[rPos] = LINEFEED;
+ nLastLineFeedPos = rPos;
+ rPos++;
+ }
+ }
+ break;
+ case 9:
+ {
+ if (bNormalizeWhitespace)
+ {
+ if ((rPos + 6) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&#x09;"),
+ 6);
+ else
+ {
+ memcpy(&(pTarget[rPos]), "&#x09;", 6);
+ rPos += 6;
+ }
+ }
+ else
+ {
+ pTarget[rPos] = 9;
+ rPos++;
+ }
+ }
+ break;
+ default:
+ {
+ pTarget[rPos] = static_cast<sal_Int8>(c);
+ rPos++;
+ }
+ break;
+ }
+ }
+ else
+ {
+ pTarget[rPos] = static_cast<sal_Int8>(c);
+ if (static_cast<sal_Int8>(c) == LINEFEED)
+ nLastLineFeedPos = rPos;
+ rPos++;
+ }
+ }
+ else
+ {
+ // Deal with replacements
+ if (bDoNormalization && !m_Replacements.empty())
+ {
+ // search
+ const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i);
+
+ // replace
+ if (it != nullptr)
+ {
+ OString name = ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8);
+ if (rPos + name.getLength() > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(name.getStr()),
+ name.getLength());
+ else
+ {
+ memcpy(&(pTarget[rPos]), name.getStr(), name.getLength());
+ rPos += name.getLength();
+ }
+ i += it->replacement.getLength() - 1;
+ continue;
+ }
+ }
+
+ // Deal with other unicode cases
+ if (rtl::isHighSurrogate(c))
+ {
+ // 1. surrogate: save (until 2. surrogate)
+ if (nSurrogate != 0) // left-over lone 1st Unicode surrogate
+ {
+ OSL_FAIL("left-over Unicode surrogate");
+ bRet = false;
+ }
+ nSurrogate = c;
+ }
+ else if (rtl::isLowSurrogate(c))
+ {
+ // 2. surrogate: write as UTF-8
+ if (nSurrogate) // can only be 1st surrogate
+ {
+ nSurrogate = rtl::combineSurrogates(nSurrogate, c);
+ sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
+ sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
+ sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
+ sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) };
+ if ((rPos + 4) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, aBytes, 4);
+ else
+ {
+ pTarget[rPos] = aBytes[0];
+ rPos++;
+ pTarget[rPos] = aBytes[1];
+ rPos++;
+ pTarget[rPos] = aBytes[2];
+ rPos++;
+ pTarget[rPos] = aBytes[3];
+ rPos++;
+ }
+ }
+ else // lone 2nd surrogate
+ {
+ OSL_FAIL("illegal Unicode character");
+ bRet = false;
+ }
+
+ // reset surrogate
+ nSurrogate = 0;
+ }
+ else if (c > 0x07FF)
+ {
+ sal_Int8 aBytes[]
+ = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)), sal_Int8(0x80 | ((c >> 6) & 0x3F)),
+ sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
+ if ((rPos + 3) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, aBytes, 3);
+ else
+ {
+ pTarget[rPos] = aBytes[0];
+ rPos++;
+ pTarget[rPos] = aBytes[1];
+ rPos++;
+ pTarget[rPos] = aBytes[2];
+ rPos++;
+ }
+ }
+ else
+ {
+ sal_Int8 aBytes[]
+ = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)), sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
+ if ((rPos + 2) > SEQUENCESIZE)
+ AddBytes(pTarget, rPos, aBytes, 2);
+ else
+ {
+ pTarget[rPos] = aBytes[0];
+ rPos++;
+ pTarget[rPos] = aBytes[1];
+ rPos++;
+ }
+ }
+ }
+
+ OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position");
+ if (rPos == SEQUENCESIZE)
+ rPos = writeSequence();
+
+ // reset left-over surrogate
+ if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
+ {
+ OSL_FAIL("left-over Unicode surrogate");
+ nSurrogate = 0;
+ bRet = false;
+ }
+ }
+ if (nSurrogate != 0) // trailing lone 1st surrogate
+ {
+ OSL_FAIL("left-over Unicode surrogate");
+ bRet = false;
+ }
+ return bRet;
+}
+
+void SaxWriterHelper::FinishStartElement()
+{
+ if (!m_bStartElementFinished)
+ {
+ mp_Sequence[nCurrentPos] = '>';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ m_bStartElementFinished = true;
+ }
+}
+
+void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel)
+{
+ FinishStartElement();
+ if (m_nLevel > 0)
+ {
+ if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE)
+ {
+ mp_Sequence[nCurrentPos] = LINEFEED;
+ nLastLineFeedPos = nCurrentPos;
+ nCurrentPos++;
+ memset(&(mp_Sequence[nCurrentPos]), 32, m_nLevel);
+ nCurrentPos += m_nLevel;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ }
+ else
+ {
+ sal_uInt32 nCount(m_nLevel + 1);
+ std::unique_ptr<sal_Int8[]> pBytes(new sal_Int8[nCount]);
+ pBytes[0] = LINEFEED;
+ memset(&(pBytes[1]), 32, m_nLevel);
+ AddBytes(mp_Sequence, nCurrentPos, pBytes.get(), nCount);
+ pBytes.reset();
+ nLastLineFeedPos = nCurrentPos - nCount;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ }
+ }
+ else
+ {
+ mp_Sequence[nCurrentPos] = LINEFEED;
+ nLastLineFeedPos = nCurrentPos;
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ }
+}
+
+bool SaxWriterHelper::writeString(const OUString& rWriteOutString, bool bDoNormalization,
+ bool bNormalizeWhitespace)
+{
+ FinishStartElement();
+ return convertToXML(rWriteOutString.getStr(), rWriteOutString.getLength(), bDoNormalization,
+ bNormalizeWhitespace, mp_Sequence, nCurrentPos);
+}
+
+void SaxWriterHelper::startDocument()
+{
+ const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
+ const int nLen = strlen(pc);
+ if ((nCurrentPos + nLen) <= SEQUENCESIZE)
+ {
+ memcpy(mp_Sequence, pc, nLen);
+ nCurrentPos += nLen;
+ }
+ else
+ {
+ AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>(pc), nLen);
+ }
+ OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position");
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = LINEFEED;
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+}
+
+#ifndef NDEBUG
+bool inrange(sal_Unicode c, sal_Unicode start, sal_Unicode end) { return c >= start && c <= end; }
+#endif
+
+void CheckValidName(OUString const& rName)
+{
+#ifdef NDEBUG
+ (void)rName;
+#else
+ assert(!rName.isEmpty());
+ bool hasColon(false);
+ for (sal_Int32 i = 0; i < rName.getLength(); ++i)
+ {
+ auto const c(rName[i]);
+ if (c == ':')
+ {
+ // see https://www.w3.org/TR/REC-xml-names/#ns-qualnames
+ SAL_WARN_IF(hasColon, "sax", "only one colon allowed: " << rName);
+ assert(!hasColon && "only one colon allowed");
+ hasColon = true;
+ }
+ else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.'
+ && !inrange(c, 0x00C0, 0x00D6) && !inrange(c, 0x00D8, 0x00F6)
+ && !inrange(c, 0x00F8, 0x02FF) && !inrange(c, 0x0370, 0x037D)
+ && !inrange(c, 0x037F, 0x1FFF) && !inrange(c, 0x200C, 0x200D)
+ && !inrange(c, 0x2070, 0x218F) && !inrange(c, 0x2C00, 0x2FEF)
+ && !inrange(c, 0x3001, 0xD7FF) && !inrange(c, 0xF900, 0xFDCF)
+ && !inrange(c, 0xFDF0, 0xFFFD) && c != 0x00B7 && !inrange(c, 0x0300, 0x036F)
+ && !inrange(c, 0x203F, 0x2040))
+ {
+ // https://www.w3.org/TR/xml11/#NT-NameChar
+ // (currently we don't warn about invalid start chars)
+ SAL_WARN("sax", "unexpected character in attribute name: " << rName);
+ assert(!"unexpected character in attribute name");
+ }
+ }
+#endif
+}
+
+SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName,
+ const Reference<XAttributeList>& xAttribs)
+{
+ FinishStartElement();
+
+#ifdef DBG_UTIL
+ m_DebugStartedElements.push(rName);
+ ::std::set<OUString> DebugAttributes;
+#endif
+
+ mp_Sequence[nCurrentPos] = '<';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ SaxInvalidCharacterError eRet(SAX_NONE);
+ CheckValidName(rName);
+ if (!writeString(rName, false, false))
+ eRet = SAX_ERROR;
+
+ sal_Int16 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
+ for (sal_Int16 i = 0; i < nAttribCount; i++)
+ {
+ mp_Sequence[nCurrentPos] = ' ';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ OUString const& rAttrName(xAttribs->getNameByIndex(i));
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Unique Att Spec
+ assert(DebugAttributes.find(rAttrName) == DebugAttributes.end());
+ DebugAttributes.insert(rAttrName);
+#endif
+ CheckValidName(rAttrName);
+ if (!writeString(rAttrName, false, false))
+ eRet = SAX_ERROR;
+
+ mp_Sequence[nCurrentPos] = '=';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '"';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ if (!writeString(xAttribs->getValueByIndex(i), true, true) && eRet != SAX_ERROR)
+ eRet = SAX_WARNING;
+
+ mp_Sequence[nCurrentPos] = '"';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ }
+
+ m_bStartElementFinished = false; // because the '>' character is not added,
+ // because it is possible, that the "/>"
+ // characters have to add
+ return eRet;
+}
+
+bool SaxWriterHelper::FinishEmptyElement()
+{
+ if (m_bStartElementFinished)
+ return false;
+
+ mp_Sequence[nCurrentPos] = '/';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '>';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ m_bStartElementFinished = true;
+
+ return true;
+}
+
+bool SaxWriterHelper::endElement(const OUString& rName)
+{
+ FinishStartElement();
+
+ mp_Sequence[nCurrentPos] = '<';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '/';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ CheckValidName(rName);
+ bool bRet(writeString(rName, false, false));
+
+ mp_Sequence[nCurrentPos] = '>';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ return bRet;
+}
+
+void SaxWriterHelper::endDocument()
+{
+ if (nCurrentPos > 0)
+ {
+ m_Sequence.realloc(nCurrentPos);
+ nCurrentPos = writeSequence();
+ //m_Sequence.realloc(SEQUENCESIZE);
+ }
+}
+
+void SaxWriterHelper::clearBuffer()
+{
+ FinishStartElement();
+ if (nCurrentPos > 0)
+ {
+ m_Sequence.realloc(nCurrentPos);
+ nCurrentPos = writeSequence();
+ m_Sequence.realloc(SEQUENCESIZE);
+ // Be sure to update the array pointer after the reallocation.
+ mp_Sequence = m_Sequence.getArray();
+ }
+}
+
+bool SaxWriterHelper::processingInstruction(const OUString& rTarget, const OUString& rData)
+{
+ FinishStartElement();
+ mp_Sequence[nCurrentPos] = '<';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '?';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ bool bRet(writeString(rTarget, false, false));
+
+ mp_Sequence[nCurrentPos] = ' ';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ if (!writeString(rData, false, false))
+ bRet = false;
+
+ mp_Sequence[nCurrentPos] = '?';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '>';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ return bRet;
+}
+
+void SaxWriterHelper::startCDATA()
+{
+ FinishStartElement();
+ if ((nCurrentPos + 9) <= SEQUENCESIZE)
+ {
+ memcpy(&(mp_Sequence[nCurrentPos]), "<![CDATA[", 9);
+ nCurrentPos += 9;
+ }
+ else
+ AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("<![CDATA["), 9);
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+}
+
+void SaxWriterHelper::endCDATA()
+{
+ FinishStartElement();
+ if ((nCurrentPos + 3) <= SEQUENCESIZE)
+ {
+ memcpy(&(mp_Sequence[nCurrentPos]), "]]>", 3);
+ nCurrentPos += 3;
+ }
+ else
+ AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("]]>"), 3);
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+}
+
+bool SaxWriterHelper::comment(const OUString& rComment)
+{
+ FinishStartElement();
+ mp_Sequence[nCurrentPos] = '<';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '!';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '-';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '-';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ bool bRet(writeString(rComment, false, false));
+
+ mp_Sequence[nCurrentPos] = '-';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '-';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+ mp_Sequence[nCurrentPos] = '>';
+ nCurrentPos++;
+ if (nCurrentPos == SEQUENCESIZE)
+ nCurrentPos = writeSequence();
+
+ return bRet;
+}
+
+sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNormalization,
+ bool bNormalizeWhitespace)
+{
+ sal_Int32 nOutputLength = 0;
+ sal_uInt32 nSurrogate = 0;
+
+ const sal_Unicode* pStr = rStr.getStr();
+ sal_Int32 nStrLen = rStr.getLength();
+ for (sal_Int32 i = 0; i < nStrLen; i++)
+ {
+ sal_uInt16 c = pStr[i];
+ if (!IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F))
+ {
+ if (bDoNormalization)
+ {
+ switch (c)
+ {
+ case '&': // resemble to &amp;
+ nOutputLength += 5;
+ break;
+ case '<': // &lt;
+ case '>': // &gt;
+ nOutputLength += 4;
+ break;
+ case '\'': // &apos;
+ case '"': // &quot;
+ case 13: // &#x0d;
+ nOutputLength += 6;
+ break;
+
+ case 10: // &#x0a;
+ case 9: // &#x09;
+ if (bNormalizeWhitespace)
+ {
+ nOutputLength += 6;
+ }
+ else
+ {
+ nOutputLength++;
+ }
+ break;
+ default:
+ nOutputLength++;
+ }
+ }
+ else
+ {
+ nOutputLength++;
+ }
+ }
+ else
+ {
+ // Deal with replacements
+ if (bDoNormalization && !m_Replacements.empty())
+ {
+ // search
+ const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i);
+
+ if (it != nullptr)
+ {
+ nOutputLength
+ += ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8).getLength();
+ i += it->replacement.getLength() - 1;
+ continue;
+ }
+ }
+
+ // Deal with other unicode cases
+ if (rtl::isHighSurrogate(c))
+ {
+ // save surrogate
+ nSurrogate = c;
+ }
+ else if (rtl::isLowSurrogate(c))
+ {
+ // 2. surrogate: write as UTF-8 (if range is OK
+ if (nSurrogate)
+ nOutputLength += 4;
+ nSurrogate = 0;
+ }
+ else if (c > 0x07FF)
+ {
+ nOutputLength += 3;
+ }
+ else
+ {
+ nOutputLength += 2;
+ }
+ }
+
+ // surrogate processing
+ if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
+ nSurrogate = 0;
+ }
+
+ return nOutputLength;
+}
+
+const ReplacementPair* SaxWriterHelper::findXMLReplacement(const sal_Unicode* pStr,
+ sal_Int32 nStrLen)
+{
+ for (size_t iter = 0; iter < m_Replacements.size(); ++iter)
+ {
+ if (m_Replacements[iter].replacement.getLength() > nStrLen)
+ continue;
+ sal_Int32 matches = m_Replacements[iter].replacement.compareTo(
+ std::u16string_view(pStr, m_Replacements[iter].replacement.getLength()));
+ if (matches == 0)
+ return &m_Replacements[iter];
+ if (matches > 0)
+ return nullptr;
+ }
+ return nullptr;
+}
+
+class SAXWriter : public WeakImplHelper<XWriter, XServiceInfo>
+{
+public:
+ SAXWriter()
+ : m_bDocStarted(false)
+ , m_bIsCDATA(false)
+ , m_bForceLineBreak(false)
+ , m_bAllowLineBreak(false)
+ , m_nLevel(0)
+ {
+ }
+
+public: // XActiveDataSource
+ virtual void SAL_CALL setOutputStream(const Reference<XOutputStream>& aStream) override
+ {
+ try
+ {
+ // temporary: set same stream again to clear buffer
+ if (m_out == aStream && m_pSaxWriterHelper && m_bDocStarted)
+ m_pSaxWriterHelper->clearBuffer();
+ else
+ {
+ m_out = aStream;
+ m_pSaxWriterHelper.reset(new SaxWriterHelper(m_out));
+ m_bDocStarted = false;
+ m_nLevel = 0;
+ m_bIsCDATA = false;
+ }
+ }
+ catch (const SAXException& e)
+ {
+ throw css::lang::WrappedTargetRuntimeException(
+ e.Message, static_cast<OWeakObject*>(this), e.WrappedException);
+ }
+ }
+ virtual Reference<XOutputStream> SAL_CALL getOutputStream() override { return m_out; }
+
+public: // XDocumentHandler
+ virtual void SAL_CALL startDocument() override;
+
+ virtual void SAL_CALL endDocument() override;
+
+ virtual void SAL_CALL startElement(const OUString& aName,
+ const Reference<XAttributeList>& xAttribs) override;
+
+ virtual void SAL_CALL endElement(const OUString& aName) override;
+
+ virtual void SAL_CALL characters(const OUString& aChars) override;
+
+ virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) override;
+ virtual void SAL_CALL processingInstruction(const OUString& aTarget,
+ const OUString& aData) override;
+ virtual void SAL_CALL setDocumentLocator(const Reference<XLocator>& xLocator) override;
+ virtual void SAL_CALL setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>&
+ replacements) override;
+
+public: // XExtendedDocumentHandler
+ virtual void SAL_CALL startCDATA() override;
+ virtual void SAL_CALL endCDATA() override;
+ virtual void SAL_CALL comment(const OUString& sComment) override;
+ virtual void SAL_CALL unknown(const OUString& sString) override;
+ virtual void SAL_CALL allowLineBreak() override;
+
+public: // XServiceInfo
+ OUString SAL_CALL getImplementationName() override;
+ Sequence<OUString> SAL_CALL getSupportedServiceNames() override;
+ sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
+
+private:
+ sal_Int32 getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept;
+
+ Reference<XOutputStream> m_out;
+ std::unique_ptr<SaxWriterHelper> m_pSaxWriterHelper;
+
+ // Status information
+ bool m_bDocStarted : 1;
+ bool m_bIsCDATA : 1;
+ bool m_bForceLineBreak : 1;
+ bool m_bAllowLineBreak : 1;
+ sal_Int32 m_nLevel;
+};
+
+sal_Int32 SAXWriter::getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept
+{
+ sal_Int32 nLength = -1;
+ if (m_pSaxWriterHelper)
+ {
+ if (m_bForceLineBreak
+ || (m_bAllowLineBreak
+ && ((nFirstLineBreakOccurrence + m_pSaxWriterHelper->GetLastColumnCount())
+ > MAXCOLUMNCOUNT)))
+ nLength = m_nLevel;
+ }
+ m_bForceLineBreak = false;
+ m_bAllowLineBreak = false;
+ return nLength;
+}
+
+bool isFirstCharWhitespace(const sal_Unicode* p) noexcept { return *p == ' '; }
+
+// XServiceInfo
+OUString SAXWriter::getImplementationName() { return "com.sun.star.extensions.xml.sax.Writer"; }
+
+// XServiceInfo
+sal_Bool SAXWriter::supportsService(const OUString& ServiceName)
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+// XServiceInfo
+Sequence<OUString> SAXWriter::getSupportedServiceNames()
+{
+ return { "com.sun.star.xml.sax.Writer" };
+}
+
+void SAXWriter::startDocument()
+{
+ if (m_bDocStarted || !m_out.is() || !m_pSaxWriterHelper)
+ {
+ throw SAXException();
+ }
+ m_bDocStarted = true;
+ m_pSaxWriterHelper->startDocument();
+}
+
+void SAXWriter::endDocument()
+{
+ if (!m_bDocStarted)
+ {
+ throw SAXException("endDocument called before startDocument", Reference<XInterface>(),
+ Any());
+ }
+ if (m_nLevel)
+ {
+ throw SAXException("unexpected end of document", Reference<XInterface>(), Any());
+ }
+ m_pSaxWriterHelper->endDocument();
+ try
+ {
+ m_out->closeOutput();
+ }
+ catch (const IOException&)
+ {
+ css::uno::Any anyEx = cppu::getCaughtException();
+ throw SAXException("IO exception during closing the IO Stream", Reference<XInterface>(),
+ anyEx);
+ }
+}
+
+void SAXWriter::startElement(const OUString& aName, const Reference<XAttributeList>& xAttribs)
+{
+ if (!m_bDocStarted)
+ {
+ SAXException except;
+ except.Message = "startElement called before startDocument";
+ throw except;
+ }
+ if (m_bIsCDATA)
+ {
+ SAXException except;
+ except.Message = "startElement call not allowed with CDATA sections";
+ throw except;
+ }
+
+ sal_Int32 nLength(0);
+ if (m_bAllowLineBreak)
+ {
+ sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
+
+ nLength++; // "<"
+ nLength += m_pSaxWriterHelper->calcXMLByteLength(aName, false, false); // the tag name
+
+ sal_Int16 n;
+ for (n = 0; n < static_cast<sal_Int16>(nAttribCount); n++)
+ {
+ nLength++; // " "
+ OUString tmp = xAttribs->getNameByIndex(n);
+
+ nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, false, false);
+
+ nLength += 2; // ="
+
+ tmp = xAttribs->getValueByIndex(n);
+
+ nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, true, true);
+
+ nLength += 1; // "
+ }
+
+ nLength++; // '>'
+ }
+
+ // Is there a new indentation necessary ?
+ sal_Int32 nPrefix(getIndentPrefixLength(nLength));
+
+ // write into sequence
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ SaxInvalidCharacterError eRet(m_pSaxWriterHelper->startElement(aName, xAttribs));
+
+ m_nLevel++;
+
+ if (eRet == SAX_WARNING)
+ {
+ SAXInvalidCharacterException except;
+ except.Message = "Invalid character during XML-Export in an attribute value";
+ throw except;
+ }
+ else if (eRet == SAX_ERROR)
+ {
+ SAXException except;
+ except.Message = "Invalid character during XML-Export";
+ throw except;
+ }
+}
+
+void SAXWriter::endElement(const OUString& aName)
+{
+ if (!m_bDocStarted)
+ {
+ throw SAXException();
+ }
+ m_nLevel--;
+
+ if (m_nLevel < 0)
+ {
+ throw SAXException();
+ }
+ bool bRet(true);
+
+ // check here because Helper's endElement is not always called
+#ifdef DBG_UTIL
+ assert(!m_pSaxWriterHelper->m_DebugStartedElements.empty());
+ // Well-formedness constraint: Element Type Match
+ assert(aName == m_pSaxWriterHelper->m_DebugStartedElements.top());
+ m_pSaxWriterHelper->m_DebugStartedElements.pop();
+#endif
+
+ if (m_pSaxWriterHelper->FinishEmptyElement())
+ m_bForceLineBreak = false;
+ else
+ {
+ // only ascii chars allowed
+ sal_Int32 nLength(0);
+ if (m_bAllowLineBreak)
+ nLength = 3 + m_pSaxWriterHelper->calcXMLByteLength(aName, false, false);
+ sal_Int32 nPrefix = getIndentPrefixLength(nLength);
+
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ bRet = m_pSaxWriterHelper->endElement(aName);
+ }
+
+ if (!bRet)
+ {
+ SAXException except;
+ except.Message = "Invalid character during XML-Export";
+ throw except;
+ }
+}
+
+void SAXWriter::characters(const OUString& aChars)
+{
+ if (!m_bDocStarted)
+ {
+ SAXException except;
+ except.Message = "characters method called before startDocument";
+ throw except;
+ }
+
+ bool bThrowException(false);
+ if (!aChars.isEmpty())
+ {
+ if (m_bIsCDATA)
+ bThrowException = !m_pSaxWriterHelper->writeString(aChars, false, false);
+ else
+ {
+ // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
+ // many 2 and 3 byte chars are inbetween. However this whole stuff
+ // is eitherway for pretty printing only, so it does not need to be exact.
+ sal_Int32 nLength(0);
+ sal_Int32 nIndentPrefix(-1);
+ if (m_bAllowLineBreak)
+ {
+ // returns position of first ascii 10 within the string, -1 when no 10 in string.
+ sal_Int32 nFirstLineBreakOccurrence = aChars.indexOf(LINEFEED);
+
+ nLength = m_pSaxWriterHelper->calcXMLByteLength(aChars, !m_bIsCDATA, false);
+ nIndentPrefix = getIndentPrefixLength(
+ nFirstLineBreakOccurrence >= 0 ? nFirstLineBreakOccurrence : nLength);
+ }
+ else
+ nIndentPrefix = getIndentPrefixLength(nLength);
+
+ // insert indentation
+ if (nIndentPrefix >= 0)
+ {
+ if (isFirstCharWhitespace(aChars.getStr()))
+ m_pSaxWriterHelper->insertIndentation(nIndentPrefix - 1);
+ else
+ m_pSaxWriterHelper->insertIndentation(nIndentPrefix);
+ }
+ bThrowException = !m_pSaxWriterHelper->writeString(aChars, true, false);
+ }
+ }
+ if (bThrowException)
+ {
+ SAXInvalidCharacterException except;
+ except.Message = "Invalid character during XML-Export";
+ throw except;
+ }
+}
+
+void SAXWriter::ignorableWhitespace(const OUString&)
+{
+ if (!m_bDocStarted)
+ {
+ throw SAXException();
+ }
+
+ m_bForceLineBreak = true;
+}
+
+void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData)
+{
+ if (!m_bDocStarted || m_bIsCDATA)
+ {
+ throw SAXException();
+ }
+
+ sal_Int32 nLength(0);
+ if (m_bAllowLineBreak)
+ {
+ nLength = 2; // "<?"
+ nLength += m_pSaxWriterHelper->calcXMLByteLength(aTarget, false, false);
+
+ nLength += 1; // " "
+
+ nLength += m_pSaxWriterHelper->calcXMLByteLength(aData, false, false);
+
+ nLength += 2; // "?>"
+ }
+
+ sal_Int32 nPrefix = getIndentPrefixLength(nLength);
+
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ if (!m_pSaxWriterHelper->processingInstruction(aTarget, aData))
+ {
+ SAXException except;
+ except.Message = "Invalid character during XML-Export";
+ throw except;
+ }
+}
+
+void SAXWriter::setDocumentLocator(const Reference<XLocator>&) {}
+
+void SAXWriter::setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
+{
+ m_pSaxWriterHelper->setCustomEntityNames(replacements);
+}
+
+void SAXWriter::startCDATA()
+{
+ if (!m_bDocStarted || m_bIsCDATA)
+ {
+ throw SAXException();
+ }
+
+ sal_Int32 nPrefix = getIndentPrefixLength(9);
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ m_pSaxWriterHelper->startCDATA();
+
+ m_bIsCDATA = true;
+}
+
+void SAXWriter::endCDATA()
+{
+ if (!m_bDocStarted || !m_bIsCDATA)
+ {
+ SAXException except;
+ except.Message = "endCDATA was called without startCDATA";
+ throw except;
+ }
+
+ sal_Int32 nPrefix = getIndentPrefixLength(3);
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ m_pSaxWriterHelper->endCDATA();
+
+ m_bIsCDATA = false;
+}
+
+void SAXWriter::comment(const OUString& sComment)
+{
+ if (!m_bDocStarted || m_bIsCDATA)
+ {
+ throw SAXException();
+ }
+
+ sal_Int32 nLength(0);
+ if (m_bAllowLineBreak)
+ {
+ nLength = 4; // "<!--"
+ nLength += m_pSaxWriterHelper->calcXMLByteLength(sComment, false, false);
+
+ nLength += 3;
+ }
+
+ sal_Int32 nPrefix = getIndentPrefixLength(nLength);
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ if (!m_pSaxWriterHelper->comment(sComment))
+ {
+ SAXException except;
+ except.Message = "Invalid character during XML-Export";
+ throw except;
+ }
+}
+
+void SAXWriter::allowLineBreak()
+{
+ if (!m_bDocStarted || m_bAllowLineBreak)
+ {
+ throw SAXException();
+ }
+
+ m_bAllowLineBreak = true;
+}
+
+void SAXWriter::unknown(const OUString& sString)
+{
+ if (!m_bDocStarted)
+ {
+ throw SAXException();
+ }
+ if (m_bIsCDATA)
+ {
+ throw SAXException();
+ }
+
+ if (sString.startsWith("<?xml"))
+ return;
+
+ sal_Int32 nLength(0);
+ if (m_bAllowLineBreak)
+ nLength = m_pSaxWriterHelper->calcXMLByteLength(sString, false, false);
+
+ sal_Int32 nPrefix = getIndentPrefixLength(nLength);
+ if (nPrefix >= 0)
+ m_pSaxWriterHelper->insertIndentation(nPrefix);
+
+ if (!m_pSaxWriterHelper->writeString(sString, false, false))
+ {
+ SAXException except;
+ except.Message = "Invalid character during XML-Export";
+ throw except;
+ }
+}
+
+} // namespace
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
+com_sun_star_extensions_xml_sax_Writer_get_implementation(css::uno::XComponentContext*,
+ css::uno::Sequence<css::uno::Any> const&)
+{
+ return cppu::acquire(new SAXWriter);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/expatwrap/xml2utf.cxx b/sax/source/expatwrap/xml2utf.cxx
new file mode 100644
index 000000000..1f044571d
--- /dev/null
+++ b/sax/source/expatwrap/xml2utf.cxx
@@ -0,0 +1,519 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#include <string.h>
+
+#include <algorithm>
+
+#include <sal/types.h>
+
+#include <rtl/textenc.h>
+#include <rtl/tencinfo.h>
+#include <com/sun/star/io/NotConnectedException.hpp>
+#include <com/sun/star/io/XInputStream.hpp>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::io;
+
+
+#include <xml2utf.hxx>
+#include <memory>
+
+namespace sax_expatwrap {
+
+sal_Int32 XMLFile2UTFConverter::readAndConvert( Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
+{
+ if( ! m_in.is() ) {
+ throw NotConnectedException();
+ }
+ if( ! m_bStarted ) {
+ // it should be possible to find the encoding attribute
+ // within the first 512 bytes == 128 chars in UCS-4
+ nMaxToRead = ::std::max( sal_Int32(512) , nMaxToRead );
+ }
+
+ sal_Int32 nRead;
+ Sequence< sal_Int8 > seqStart;
+ while( true )
+ {
+ nRead = m_in->readSomeBytes( seq , nMaxToRead );
+
+ if( nRead + seqStart.getLength())
+ {
+ // if nRead is 0, the file is already eof.
+ if( ! m_bStarted && nRead )
+ {
+ // ensure that enough data is available to parse encoding
+ if( seqStart.hasElements() )
+ {
+ // prefix with what we had so far.
+ sal_Int32 nLength = seq.getLength();
+ seq.realloc( seqStart.getLength() + nLength );
+
+ memmove (seq.getArray() + seqStart.getLength(),
+ seq.getConstArray(),
+ nLength);
+ memcpy (seq.getArray(),
+ seqStart.getConstArray(),
+ seqStart.getLength());
+ }
+
+ // autodetection with the first bytes
+ if( ! isEncodingRecognizable( seq ) )
+ {
+ // remember what we have so far.
+ seqStart = seq;
+
+ // read more !
+ continue;
+ }
+ if( scanForEncoding( seq ) || !m_sEncoding.isEmpty() ) {
+ // initialize decoding
+ initializeDecoding();
+ }
+ seqStart = Sequence < sal_Int8 > ();
+ }
+
+ // do the encoding
+ if( m_pText2Unicode && m_pUnicode2Text &&
+ m_pText2Unicode->canContinue() ) {
+
+ Sequence<sal_Unicode> seqUnicode = m_pText2Unicode->convert( seq );
+ seq = m_pUnicode2Text->convert( seqUnicode.getConstArray(), seqUnicode.getLength() );
+ }
+
+ if( ! m_bStarted )
+ {
+ // it must now be ensured, that no encoding attribute exist anymore
+ // ( otherwise the expat-Parser will crash )
+ // This must be done after decoding !
+ // ( e.g. Files decoded in ucs-4 cannot be read properly )
+ m_bStarted = true;
+ removeEncoding( seq );
+ }
+ nRead = seq.getLength();
+ }
+
+ break;
+ }
+ return nRead;
+}
+
+void XMLFile2UTFConverter::removeEncoding( Sequence<sal_Int8> &seq )
+{
+ const sal_Int8 *pSource = seq.getArray();
+ if (seq.getLength() < 5 || strncmp(reinterpret_cast<const char *>(pSource), "<?xml", 5))
+ return;
+
+ // scan for encoding
+ OString str( reinterpret_cast<char const *>(pSource), seq.getLength() );
+
+ // cut sequence to first line break
+ // find first line break;
+ int nMax = str.indexOf( 10 );
+ if( nMax >= 0 )
+ {
+ str = str.copy( 0 , nMax );
+ }
+
+ int nFound = str.indexOf( " encoding" );
+ if( nFound < 0 ) return;
+
+ int nStop;
+ int nStart = str.indexOf( "\"" , nFound );
+ if( nStart < 0 || str.indexOf( "'" , nFound ) < nStart )
+ {
+ nStart = str.indexOf( "'" , nFound );
+ nStop = str.indexOf( "'" , nStart +1 );
+ }
+ else
+ {
+ nStop = str.indexOf( "\"" , nStart +1);
+ }
+
+ if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop )
+ {
+ // remove encoding tag from file
+ memmove( &( seq.getArray()[nFound] ) ,
+ &( seq.getArray()[nStop+1]) ,
+ seq.getLength() - nStop -1);
+ seq.realloc( seq.getLength() - ( nStop+1 - nFound ) );
+ }
+}
+
+// Checks, if enough data has been accumulated to recognize the encoding
+bool XMLFile2UTFConverter::isEncodingRecognizable( const Sequence< sal_Int8 > &seq)
+{
+ const sal_Int8 *pSource = seq.getConstArray();
+ bool bCheckIfFirstClosingBracketExists = false;
+
+ if( seq.getLength() < 8 ) {
+ // no recognition possible, when less than 8 bytes are available
+ return false;
+ }
+
+ if( ! strncmp( reinterpret_cast<const char *>(pSource), "<?xml", 5 ) ) {
+ // scan if the <?xml tag finishes within this buffer
+ bCheckIfFirstClosingBracketExists = true;
+ }
+ else if( ('<' == pSource[0] || '<' == pSource[2] ) &&
+ ('?' == pSource[4] || '?' == pSource[6] ) )
+ {
+ // check for utf-16
+ bCheckIfFirstClosingBracketExists = true;
+ }
+ else if( ( '<' == pSource[1] || '<' == pSource[3] ) &&
+ ( '?' == pSource[5] || '?' == pSource[7] ) )
+ {
+ // check for
+ bCheckIfFirstClosingBracketExists = true;
+ }
+
+ if( bCheckIfFirstClosingBracketExists )
+ {
+ // whole <?xml tag is valid
+ return std::find(seq.begin(), seq.end(), '>') != seq.end();
+ }
+
+ // No <? tag in front, no need for a bigger buffer
+ return true;
+}
+
+bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq )
+{
+ const sal_uInt8 *pSource = reinterpret_cast<const sal_uInt8*>( seq.getConstArray() );
+ bool bReturn = true;
+
+ if( seq.getLength() < 4 ) {
+ // no recognition possible, when less than 4 bytes are available
+ return false;
+ }
+
+ // first level : detect possible file formats
+ if (seq.getLength() >= 5 && !strncmp(reinterpret_cast<const char *>(pSource), "<?xml", 5)) {
+ // scan for encoding
+ OString str( reinterpret_cast<const char *>(pSource), seq.getLength() );
+
+ // cut sequence to first line break
+ //find first line break;
+ int nMax = str.indexOf( 10 );
+ if( nMax >= 0 )
+ {
+ str = str.copy( 0 , nMax );
+ }
+
+ int nFound = str.indexOf( " encoding" );
+ if( nFound >= 0 ) {
+ int nStop;
+ int nStart = str.indexOf( "\"" , nFound );
+ if( nStart < 0 || str.indexOf( "'" , nFound ) < nStart )
+ {
+ nStart = str.indexOf( "'" , nFound );
+ nStop = str.indexOf( "'" , nStart +1 );
+ }
+ else
+ {
+ nStop = str.indexOf( "\"" , nStart +1);
+ }
+ if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop )
+ {
+ // encoding found finally
+ m_sEncoding = str.copy( nStart+1 , nStop - nStart - 1 );
+ }
+ }
+ }
+ else if( 0xFE == pSource[0] &&
+ 0xFF == pSource[1] ) {
+ // UTF-16 big endian
+ // conversion is done so that encoding information can be easily extracted
+ m_sEncoding = "utf-16";
+ }
+ else if( 0xFF == pSource[0] &&
+ 0xFE == pSource[1] ) {
+ // UTF-16 little endian
+ // conversion is done so that encoding information can be easily extracted
+ m_sEncoding = "utf-16";
+ }
+ else if( 0x00 == pSource[0] && 0x3c == pSource[1] && 0x00 == pSource[2] && 0x3f == pSource[3] ) {
+ // UTF-16 big endian without byte order mark (this is (strictly speaking) an error.)
+ // The byte order mark is simply added
+
+ // simply add the byte order mark !
+ seq.realloc( seq.getLength() + 2 );
+ memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 );
+ reinterpret_cast<sal_uInt8*>(seq.getArray())[0] = 0xFE;
+ reinterpret_cast<sal_uInt8*>(seq.getArray())[1] = 0xFF;
+
+ m_sEncoding = "utf-16";
+ }
+ else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x3f == pSource[2] && 0x00 == pSource[3] ) {
+ // UTF-16 little endian without byte order mark (this is (strictly speaking) an error.)
+ // The byte order mark is simply added
+
+ seq.realloc( seq.getLength() + 2 );
+ memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 );
+ reinterpret_cast<sal_uInt8*>(seq.getArray())[0] = 0xFF;
+ reinterpret_cast<sal_uInt8*>(seq.getArray())[1] = 0xFE;
+
+ m_sEncoding = "utf-16";
+ }
+ else if( 0xEF == pSource[0] &&
+ 0xBB == pSource[1] &&
+ 0xBF == pSource[2] )
+ {
+ // UTF-8 BOM (byte order mark); signifies utf-8, and not byte order
+ // The BOM is removed.
+ memmove( seq.getArray(), &( seq.getArray()[3] ), seq.getLength()-3 );
+ seq.realloc( seq.getLength() - 3 );
+ m_sEncoding = "utf-8";
+ }
+ else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) {
+ // UCS-4 big endian
+ m_sEncoding = "ucs-4";
+ }
+ else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x00 == pSource[3] ) {
+ // UCS-4 little endian
+ m_sEncoding = "ucs-4";
+ }
+/* TODO: no need to test for the moment since we return sal_False like default case anyway
+ else if( 0x4c == pSource[0] && 0x6f == pSource[1] &&
+ 0xa7 == static_cast<unsigned char> (pSource[2]) &&
+ 0x94 == static_cast<unsigned char> (pSource[3]) ) {
+ // EBCDIC
+ bReturn = sal_False; // must be extended
+ }
+*/
+ else {
+ // other
+ // UTF8 is directly recognized by the parser.
+ bReturn = false;
+ }
+
+ return bReturn;
+}
+
+void XMLFile2UTFConverter::initializeDecoding()
+{
+
+ if( !m_sEncoding.isEmpty() )
+ {
+ rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( m_sEncoding.getStr() );
+ if( encoding != RTL_TEXTENCODING_UTF8 )
+ {
+ m_pText2Unicode = std::make_unique<Text2UnicodeConverter>( m_sEncoding );
+ m_pUnicode2Text = std::make_unique<Unicode2TextConverter>( RTL_TEXTENCODING_UTF8 );
+ }
+ }
+}
+
+
+// Text2UnicodeConverter
+
+
+Text2UnicodeConverter::Text2UnicodeConverter( const OString &sEncoding )
+ : m_convText2Unicode(nullptr)
+ , m_contextText2Unicode(nullptr)
+{
+ rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( sEncoding.getStr() );
+ if( RTL_TEXTENCODING_DONTKNOW == encoding )
+ {
+ m_bCanContinue = false;
+ m_bInitialized = false;
+ }
+ else
+ {
+ init( encoding );
+ }
+}
+
+Text2UnicodeConverter::~Text2UnicodeConverter()
+{
+ if( m_bInitialized )
+ {
+ rtl_destroyTextToUnicodeContext( m_convText2Unicode , m_contextText2Unicode );
+ rtl_destroyUnicodeToTextConverter( m_convText2Unicode );
+ }
+}
+
+void Text2UnicodeConverter::init( rtl_TextEncoding encoding )
+{
+ m_bCanContinue = true;
+ m_bInitialized = true;
+
+ m_convText2Unicode = rtl_createTextToUnicodeConverter(encoding);
+ m_contextText2Unicode = rtl_createTextToUnicodeContext( m_convText2Unicode );
+}
+
+
+Sequence<sal_Unicode> Text2UnicodeConverter::convert( const Sequence<sal_Int8> &seqText )
+{
+ sal_uInt32 uiInfo;
+ sal_Size nSrcCvtBytes = 0;
+ sal_Size nTargetCount = 0;
+ sal_Size nSourceCount = 0;
+
+ // the whole source size
+ sal_Int32 nSourceSize = seqText.getLength() + m_seqSource.getLength();
+ Sequence<sal_Unicode> seqUnicode ( nSourceSize );
+
+ const sal_Int8 *pbSource = seqText.getConstArray();
+ std::unique_ptr<sal_Int8[]> pbTempMem;
+
+ if( m_seqSource.hasElements() ) {
+ // put old rest and new byte sequence into one array
+ pbTempMem.reset(new sal_Int8[ nSourceSize ]);
+ memcpy( pbTempMem.get() , m_seqSource.getConstArray() , m_seqSource.getLength() );
+ memcpy( &(pbTempMem[ m_seqSource.getLength() ]) , seqText.getConstArray() , seqText.getLength() );
+ pbSource = pbTempMem.get();
+
+ // set to zero again
+ m_seqSource = Sequence< sal_Int8 >();
+ }
+
+ while( true ) {
+
+ /* All invalid characters are transformed to the unicode undefined char */
+ nTargetCount += rtl_convertTextToUnicode(
+ m_convText2Unicode,
+ m_contextText2Unicode,
+ reinterpret_cast<const char *>(&( pbSource[nSourceCount] )),
+ nSourceSize - nSourceCount ,
+ &( seqUnicode.getArray()[ nTargetCount ] ),
+ seqUnicode.getLength() - nTargetCount,
+ RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
+ &uiInfo,
+ &nSrcCvtBytes );
+ nSourceCount += nSrcCvtBytes;
+
+ if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL ) {
+ // save necessary bytes for next conversion
+ seqUnicode.realloc( seqUnicode.getLength() * 2 );
+ continue;
+ }
+ break;
+ }
+ if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL ) {
+ m_seqSource.realloc( nSourceSize - nSourceCount );
+ memcpy( m_seqSource.getArray() , &(pbSource[nSourceCount]) , nSourceSize-nSourceCount );
+ }
+
+ // set to correct unicode size
+ seqUnicode.realloc( nTargetCount );
+
+ return seqUnicode;
+}
+
+
+// Unicode2TextConverter
+
+
+Unicode2TextConverter::Unicode2TextConverter( rtl_TextEncoding encoding )
+{
+ m_convUnicode2Text = rtl_createUnicodeToTextConverter( encoding );
+ m_contextUnicode2Text = rtl_createUnicodeToTextContext( m_convUnicode2Text );
+}
+
+
+Unicode2TextConverter::~Unicode2TextConverter()
+{
+ rtl_destroyUnicodeToTextContext( m_convUnicode2Text , m_contextUnicode2Text );
+ rtl_destroyUnicodeToTextConverter( m_convUnicode2Text );
+}
+
+
+Sequence<sal_Int8> Unicode2TextConverter::convert(const sal_Unicode *puSource , sal_Int32 nSourceSize)
+{
+ std::unique_ptr<sal_Unicode[]> puTempMem;
+
+ if( m_seqSource.hasElements() ) {
+ // For surrogates !
+ // put old rest and new byte sequence into one array
+ // In general when surrogates are used, they should be rarely
+ // cut off between two convert()-calls. So this code is used
+ // rarely and the extra copy is acceptable.
+ puTempMem.reset(new sal_Unicode[ nSourceSize + m_seqSource.getLength()]);
+ memcpy( puTempMem.get() ,
+ m_seqSource.getConstArray() ,
+ m_seqSource.getLength() * sizeof( sal_Unicode ) );
+ memcpy(
+ &(puTempMem[ m_seqSource.getLength() ]) ,
+ puSource ,
+ nSourceSize*sizeof( sal_Unicode ) );
+ puSource = puTempMem.get();
+ nSourceSize += m_seqSource.getLength();
+
+ m_seqSource = Sequence< sal_Unicode > ();
+ }
+
+
+ sal_Size nTargetCount = 0;
+ sal_Size nSourceCount = 0;
+
+ sal_uInt32 uiInfo;
+ sal_Size nSrcCvtChars;
+
+ // take nSourceSize * 3 as preference
+ // this is an upper boundary for converting to utf8,
+ // which most often used as the target.
+ sal_Int32 nSeqSize = nSourceSize * 3;
+
+ Sequence<sal_Int8> seqText( nSeqSize );
+ char *pTarget = reinterpret_cast<char *>(seqText.getArray());
+ while( true ) {
+
+ nTargetCount += rtl_convertUnicodeToText(
+ m_convUnicode2Text,
+ m_contextUnicode2Text,
+ &( puSource[nSourceCount] ),
+ nSourceSize - nSourceCount ,
+ &( pTarget[nTargetCount] ),
+ nSeqSize - nTargetCount,
+ RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT |
+ RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT ,
+ &uiInfo,
+ &nSrcCvtChars);
+ nSourceCount += nSrcCvtChars;
+
+ if( uiInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL ) {
+ nSeqSize = nSeqSize *2;
+ seqText.realloc( nSeqSize ); // double array size
+ pTarget = reinterpret_cast<char *>(seqText.getArray());
+ continue;
+ }
+ break;
+ }
+
+ // for surrogates
+ if( uiInfo & RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL ) {
+ m_seqSource.realloc( nSourceSize - nSourceCount );
+ memcpy( m_seqSource.getArray() ,
+ &(puSource[nSourceCount]),
+ (nSourceSize - nSourceCount) * sizeof( sal_Unicode ) );
+ }
+
+ // reduce the size of the buffer (fast, no copy necessary)
+ seqText.realloc( nTargetCount );
+
+ return seqText;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
new file mode 100644
index 000000000..250078bc5
--- /dev/null
+++ b/sax/source/fastparser/fastparser.cxx
@@ -0,0 +1,1693 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sax/fastparser.hxx>
+#include <sax/fastattribs.hxx>
+#include <utility>
+#include <xml2utf.hxx>
+
+#include <com/sun/star/io/XSeekable.hpp>
+#include <com/sun/star/lang/DisposedException.hpp>
+#include <com/sun/star/lang/IllegalArgumentException.hpp>
+#include <com/sun/star/uno/XComponentContext.hpp>
+#include <com/sun/star/xml/sax/FastToken.hpp>
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+#include <com/sun/star/xml/sax/XFastContextHandler.hpp>
+#include <cppuhelper/implbase.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <cppuhelper/exc_hlp.hxx>
+#include <osl/conditn.hxx>
+#include <rtl/ref.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <sal/log.hxx>
+#include <salhelper/thread.hxx>
+#include <tools/diagnose_ex.h>
+#include <o3tl/string_view.hxx>
+
+#include <queue>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <stack>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+#include <cassert>
+#include <cstring>
+#include <libxml/parser.h>
+
+// Inverse of libxml's BAD_CAST.
+#define XML_CAST( str ) reinterpret_cast< const char* >( str )
+
+using namespace ::osl;
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::xml::sax;
+using namespace ::com::sun::star::io;
+using namespace com::sun::star;
+using namespace sax_fastparser;
+
+static void NormalizeURI( OUString& rName );
+
+namespace {
+
+struct Event;
+class FastLocatorImpl;
+struct NamespaceDefine;
+struct Entity;
+
+typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap;
+
+struct EventList
+{
+ std::vector<Event> maEvents;
+ bool mbIsAttributesEmpty;
+};
+
+enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION };
+
+struct Event
+{
+ CallbackType maType;
+ sal_Int32 mnElementToken;
+ OUString msNamespace;
+ OUString msElementName;
+ rtl::Reference< FastAttributeList > mxAttributes;
+ rtl::Reference< FastAttributeList > mxDeclAttributes;
+ OUString msChars;
+};
+
+struct NameWithToken
+{
+ OUString msName;
+ sal_Int32 mnToken;
+
+ NameWithToken(OUString sName, sal_Int32 nToken) :
+ msName(std::move(sName)), mnToken(nToken) {}
+};
+
+struct SaxContext
+{
+ Reference< XFastContextHandler > mxContext;
+ sal_Int32 mnElementToken;
+ OUString maNamespace;
+ OUString maElementName;
+
+ SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ):
+ mnElementToken(nElementToken)
+ {
+ if (nElementToken == FastToken::DONTKNOW)
+ {
+ maNamespace = aNamespace;
+ maElementName = aElementName;
+ }
+ }
+};
+
+struct ParserData
+{
+ css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler;
+ rtl::Reference<FastTokenHandlerBase> mxTokenHandler;
+ css::uno::Reference< css::xml::sax::XErrorHandler > mxErrorHandler;
+ css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler;
+
+ ParserData();
+};
+
+struct NamespaceDefine
+{
+ OString maPrefix;
+ sal_Int32 mnToken;
+ OUString maNamespaceURL;
+
+ NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL )
+ : maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {}
+ NamespaceDefine() : mnToken(-1) {}
+};
+
+// Entity binds all information needed for a single file | single call of parseStream
+struct Entity : public ParserData
+{
+ // Amount of work producer sends to consumer in one iteration:
+ static const size_t mnEventListSize = 1000;
+
+ // unique for each Entity instance:
+
+ // Number of valid events in mxProducedEvents:
+ size_t mnProducedEventsSize;
+ std::optional<EventList> mxProducedEvents;
+ std::queue<EventList> maPendingEvents;
+ std::queue<EventList> maUsedEvents;
+ std::mutex maEventProtector;
+
+ static const size_t mnEventLowWater = 4;
+ static const size_t mnEventHighWater = 8;
+ osl::Condition maConsumeResume;
+ osl::Condition maProduceResume;
+ // Event we use to store data if threading is disabled:
+ Event maSharedEvent;
+
+ // copied in copy constructor:
+
+ // Allow to disable threading for small documents:
+ bool mbEnableThreads;
+ css::xml::sax::InputSource maStructSource;
+ xmlParserCtxtPtr mpParser;
+ ::sax_expatwrap::XMLFile2UTFConverter maConverter;
+
+ // Exceptions cannot be thrown through the C-XmlParser (possible
+ // resource leaks), therefore any exception thrown by a UNO callback
+ // must be saved somewhere until the C-XmlParser is stopped.
+ css::uno::Any maSavedException;
+ std::mutex maSavedExceptionMutex;
+ void saveException( const Any & e );
+ // Thread-safe check if maSavedException has value
+ bool hasException();
+ void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
+ bool mbDuringParse );
+
+ std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack;
+ /* Context for main thread consuming events.
+ * startElement() stores the data, which characters() and endElement() uses
+ */
+ std::stack< SaxContext, std::vector<SaxContext> > maContextStack;
+ // Determines which elements of maNamespaceDefines are valid in current context
+ std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount;
+ std::vector< NamespaceDefine > maNamespaceDefines;
+
+ explicit Entity( const ParserData& rData );
+ Entity( const Entity& rEntity ) = delete;
+ Entity& operator=( const Entity& rEntity ) = delete;
+ void startElement( Event const *pEvent );
+ void characters( const OUString& sChars );
+ void endElement();
+ void processingInstruction( const OUString& rTarget, const OUString& rData );
+ EventList& getEventList();
+ Event& getEvent( CallbackType aType );
+};
+
+// Stuff for custom entity names
+struct ReplacementPair
+{
+ OUString name;
+ OUString replacement;
+};
+inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
+{
+ return lhs.name < rhs.name;
+}
+inline bool operator<(const ReplacementPair& lhs, const char* rhs)
+{
+ return lhs.name.compareToAscii(rhs) < 0;
+}
+
+} // namespace
+
+namespace sax_fastparser {
+
+class FastSaxParserImpl
+{
+public:
+ explicit FastSaxParserImpl();
+ ~FastSaxParserImpl();
+
+private:
+ std::vector<ReplacementPair> m_Replacements;
+ std::vector<xmlEntityPtr> m_TemporalEntities;
+
+public:
+ // XFastParser
+ /// @throws css::xml::sax::SAXException
+ /// @throws css::io::IOException
+ /// @throws css::uno::RuntimeException
+ void parseStream( const css::xml::sax::InputSource& aInputSource );
+ /// @throws css::uno::RuntimeException
+ void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler );
+ /// @throws css::uno::RuntimeException
+ void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler );
+ /// @throws css::lang::IllegalArgumentException
+ /// @throws css::uno::RuntimeException
+ void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken );
+ /// @throws css::lang::IllegalArgumentException
+ /// @throws css::uno::RuntimeException
+ OUString const & getNamespaceURL( std::u16string_view rPrefix );
+ /// @throws css::uno::RuntimeException
+ void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler );
+ /// @throws css::uno::RuntimeException
+ void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler);
+ // Fake DTD file
+ void setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements);
+
+ // called by the C callbacks of the expat parser
+ void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
+ int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes );
+ void callbackEndElement();
+ void callbackCharacters( const xmlChar* s, int nLen );
+ void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data );
+ xmlEntityPtr callbackGetEntity( const xmlChar *name );
+
+ void pushEntity(const ParserData&, xml::sax::InputSource const&);
+ void popEntity();
+ Entity& getEntity() { return *mpTop; }
+ void parse();
+ void produce( bool bForceFlush = false );
+ bool m_bIgnoreMissingNSDecl;
+ bool m_bDisableThreadedParser;
+
+private:
+ bool consume(EventList&);
+ void deleteUsedEvents();
+ void sendPendingCharacters();
+ void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes);
+
+ sal_Int32 GetToken( const xmlChar* pName, sal_Int32 nameLen );
+ /// @throws css::xml::sax::SAXException
+ sal_Int32 GetTokenWithPrefix( const xmlChar* pPrefix, int prefixLen, const xmlChar* pName, int nameLen );
+ /// @throws css::xml::sax::SAXException
+ OUString const & GetNamespaceURL( std::string_view rPrefix );
+ sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL );
+ sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName, int nNameLen );
+ void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL );
+
+private:
+ std::mutex maMutex; ///< Protecting whole parseStream() execution
+ ::rtl::Reference< FastLocatorImpl > mxDocumentLocator;
+ NamespaceMap maNamespaceMap;
+
+ ParserData maData; /// Cached parser configuration for next call of parseStream().
+
+ Entity *mpTop; /// std::stack::top() is amazingly slow => cache this.
+ std::stack< Entity > maEntities; /// Entity stack for each call of parseStream().
+ std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent.
+};
+
+} // namespace sax_fastparser
+
+namespace {
+
+class ParserThread: public salhelper::Thread
+{
+ FastSaxParserImpl *mpParser;
+public:
+ explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {}
+private:
+ virtual void execute() override
+ {
+ try
+ {
+ mpParser->parse();
+ }
+ catch (...)
+ {
+ Entity &rEntity = mpParser->getEntity();
+ rEntity.getEvent( CallbackType::EXCEPTION );
+ mpParser->produce( true );
+ }
+ }
+};
+
+extern "C" {
+
+static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
+ int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes)
+{
+ FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
+ pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes );
+}
+
+static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/)
+{
+ FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
+ pFastParser->callbackEndElement();
+}
+
+static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen )
+{
+ FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
+ pFastParser->callbackCharacters( s, nLen );
+}
+
+static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data )
+{
+ FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
+ pFastParser->callbackProcessingInstruction( target, data );
+}
+
+static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name)
+{
+ FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData );
+ return pFastParser->callbackGetEntity( name );
+}
+
+}
+
+class FastLocatorImpl : public WeakImplHelper< XLocator >
+{
+public:
+ explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {}
+
+ void dispose() { mpParser = nullptr; }
+ /// @throws RuntimeException
+ void checkDispose() const { if( !mpParser ) throw DisposedException(); }
+
+ //XLocator
+ virtual sal_Int32 SAL_CALL getColumnNumber() override;
+ virtual sal_Int32 SAL_CALL getLineNumber() override;
+ virtual OUString SAL_CALL getPublicId() override;
+ virtual OUString SAL_CALL getSystemId() override;
+
+private:
+ FastSaxParserImpl *mpParser;
+};
+
+sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber()
+{
+ checkDispose();
+ return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser );
+}
+
+sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber()
+{
+ checkDispose();
+ return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser );
+}
+
+OUString SAL_CALL FastLocatorImpl::getPublicId()
+{
+ checkDispose();
+ return mpParser->getEntity().maStructSource.sPublicId;
+}
+
+OUString SAL_CALL FastLocatorImpl::getSystemId()
+{
+ checkDispose();
+ return mpParser->getEntity().maStructSource.sSystemId;
+}
+
+ParserData::ParserData()
+{}
+
+Entity::Entity(const ParserData& rData)
+ : ParserData(rData)
+ , mnProducedEventsSize(0)
+ , mbEnableThreads(false)
+ , mpParser(nullptr)
+{
+}
+
+void Entity::startElement( Event const *pEvent )
+{
+ const sal_Int32& nElementToken = pEvent->mnElementToken;
+ const OUString& aNamespace = pEvent->msNamespace;
+ const OUString& aElementName = pEvent->msElementName;
+
+ // Use un-wrapped pointers to avoid significant acquire/release overhead
+ XFastContextHandler *pParentContext = nullptr;
+ if( !maContextStack.empty() )
+ {
+ pParentContext = maContextStack.top().mxContext.get();
+ if( !pParentContext )
+ {
+ maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) );
+ return;
+ }
+ }
+
+ maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) );
+
+ try
+ {
+ Reference< XFastAttributeList > xAttr( pEvent->mxAttributes );
+ Reference< XFastContextHandler > xContext;
+
+ if ( mxNamespaceHandler.is() )
+ {
+ const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
+ for (const auto& rNSDeclAttrib : NSDeclAttribs)
+ {
+ mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value );
+ }
+ }
+
+ if( nElementToken == FastToken::DONTKNOW )
+ {
+ if( pParentContext )
+ xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
+ else if( mxDocumentHandler.is() )
+ xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
+
+ if( xContext.is() )
+ {
+ xContext->startUnknownElement( aNamespace, aElementName, xAttr );
+ }
+ }
+ else
+ {
+ if( pParentContext )
+ xContext = pParentContext->createFastChildContext( nElementToken, xAttr );
+ else if( mxDocumentHandler.is() )
+ xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
+
+ if( xContext.is() )
+ xContext->startFastElement( nElementToken, xAttr );
+ }
+ // swap the reference we own in to avoid referencing thrash.
+ maContextStack.top().mxContext = std::move( xContext );
+ }
+ catch (...)
+ {
+ saveException( ::cppu::getCaughtException() );
+ }
+}
+
+void Entity::characters( const OUString& sChars )
+{
+ if (maContextStack.empty())
+ {
+ // Malformed XML stream !?
+ return;
+ }
+
+ XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
+ if( pContext ) try
+ {
+ pContext->characters( sChars );
+ }
+ catch (...)
+ {
+ saveException( ::cppu::getCaughtException() );
+ }
+}
+
+void Entity::endElement()
+{
+ if (maContextStack.empty())
+ {
+ // Malformed XML stream !?
+ return;
+ }
+
+ const SaxContext& aContext = maContextStack.top();
+ XFastContextHandler* pContext( aContext.mxContext.get() );
+ if( pContext )
+ try
+ {
+ sal_Int32 nElementToken = aContext.mnElementToken;
+ if( nElementToken != FastToken::DONTKNOW )
+ pContext->endFastElement( nElementToken );
+ else
+ pContext->endUnknownElement( aContext.maNamespace, aContext.maElementName );
+ }
+ catch (...)
+ {
+ saveException( ::cppu::getCaughtException() );
+ }
+ maContextStack.pop();
+}
+
+void Entity::processingInstruction( const OUString& rTarget, const OUString& rData )
+{
+ if( mxDocumentHandler.is() ) try
+ {
+ mxDocumentHandler->processingInstruction( rTarget, rData );
+ }
+ catch (...)
+ {
+ saveException( ::cppu::getCaughtException() );
+ }
+}
+
+EventList& Entity::getEventList()
+{
+ if (!mxProducedEvents)
+ {
+ std::unique_lock aGuard(maEventProtector);
+ if (!maUsedEvents.empty())
+ {
+ mxProducedEvents = std::move(maUsedEvents.front());
+ maUsedEvents.pop();
+ aGuard.unlock(); // unlock
+ mnProducedEventsSize = 0;
+ }
+ if (!mxProducedEvents)
+ {
+ mxProducedEvents.emplace();
+ mxProducedEvents->maEvents.resize(mnEventListSize);
+ mxProducedEvents->mbIsAttributesEmpty = false;
+ mnProducedEventsSize = 0;
+ }
+ }
+ return *mxProducedEvents;
+}
+
+Event& Entity::getEvent( CallbackType aType )
+{
+ if (!mbEnableThreads)
+ return maSharedEvent;
+
+ EventList& rEventList = getEventList();
+ if (mnProducedEventsSize == rEventList.maEvents.size())
+ {
+ SAL_WARN_IF(!maSavedException.hasValue(), "sax",
+ "Event vector should only exceed " << mnEventListSize <<
+ " temporarily while an exception is pending");
+ rEventList.maEvents.resize(mnProducedEventsSize + 1);
+ }
+ Event& rEvent = rEventList.maEvents[mnProducedEventsSize++];
+ rEvent.maType = aType;
+ return rEvent;
+}
+
+OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine )
+{
+ const char* pMessage;
+ xmlErrorPtr error = xmlCtxtGetLastError( ctxt );
+ if( error && error->message )
+ pMessage = error->message;
+ else
+ pMessage = "unknown error";
+ return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " +
+ OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US);
+}
+
+// throw an exception, but avoid callback if
+// during a threaded produce
+void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator,
+ bool mbDuringParse )
+{
+ // Error during parsing !
+ Any savedException;
+ {
+ std::scoped_lock g(maSavedExceptionMutex);
+ if (maSavedException.hasValue())
+ {
+ savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get());
+ }
+ }
+ SAXParseException aExcept(
+ lclGetErrorMessage( mpParser,
+ xDocumentLocator->getSystemId(),
+ xDocumentLocator->getLineNumber() ),
+ Reference< XInterface >(),
+ savedException,
+ xDocumentLocator->getPublicId(),
+ xDocumentLocator->getSystemId(),
+ xDocumentLocator->getLineNumber(),
+ xDocumentLocator->getColumnNumber()
+ );
+
+ // error handler is set, it may throw the exception
+ if( !mbDuringParse || !mbEnableThreads )
+ {
+ if (mxErrorHandler.is() )
+ mxErrorHandler->fatalError( Any( aExcept ) );
+ }
+
+ // error handler has not thrown, but parsing must stop => throw ourselves
+ throw aExcept;
+}
+
+// In the single threaded case we emit events via our C
+// callbacks, so any exception caught must be queued up until
+// we can safely re-throw it from our C++ parent of parse()
+
+// If multi-threaded, we need to push an EXCEPTION event, at
+// which point we transfer ownership of maSavedException to
+// the consuming thread.
+void Entity::saveException( const Any & e )
+{
+ // fdo#81214 - allow the parser to run on after an exception,
+ // unexpectedly some 'startElements' produce a UNO_QUERY_THROW
+ // for XComponent; and yet expect to continue parsing.
+ SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e));
+ std::scoped_lock g(maSavedExceptionMutex);
+ if (maSavedException.hasValue())
+ {
+ SAL_INFO("sax.fastparser", "discarding exception, already have one");
+ }
+ else
+ {
+ maSavedException = e;
+ }
+}
+
+bool Entity::hasException()
+{
+ std::scoped_lock g(maSavedExceptionMutex);
+ return maSavedException.hasValue();
+}
+
+} // namespace
+
+namespace sax_fastparser {
+
+FastSaxParserImpl::FastSaxParserImpl() :
+ m_bIgnoreMissingNSDecl(false),
+ m_bDisableThreadedParser(false),
+ mpTop(nullptr)
+{
+ mxDocumentLocator.set( new FastLocatorImpl( this ) );
+}
+
+FastSaxParserImpl::~FastSaxParserImpl()
+{
+ if( mxDocumentLocator.is() )
+ mxDocumentLocator->dispose();
+ for (auto& entity : m_TemporalEntities)
+ {
+ if (!entity)
+ continue;
+ xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity);
+ xmlUnlinkNode(pPtr);
+ xmlFreeNode(pPtr);
+ }
+}
+
+void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL )
+{
+ Entity& rEntity = getEntity();
+ assert(!rEntity.maNamespaceCount.empty()); // need a context!
+
+ sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++;
+ if( rEntity.maNamespaceDefines.size() <= nOffset )
+ rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
+
+ rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL );
+}
+
+sal_Int32 FastSaxParserImpl::GetToken( const xmlChar* pName, sal_Int32 nameLen /* = 0 */ )
+{
+ return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(),
+ XML_CAST( pName ), nameLen ); // uses utf-8
+}
+
+sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, int nPrefixLen, const xmlChar* pName, int nNameLen )
+{
+ sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
+
+ Entity& rEntity = getEntity();
+ if (rEntity.maNamespaceCount.empty())
+ return nNamespaceToken;
+
+ sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
+ while( nNamespace-- )
+ {
+ const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace];
+ const OString& rPrefix( rNamespaceDefine.maPrefix );
+ if( (rPrefix.getLength() == nPrefixLen) &&
+ rtl_str_reverseCompare_WithLength(rPrefix.pData->buffer, rPrefix.pData->length, XML_CAST( pPrefix ), nPrefixLen ) == 0 )
+ {
+ nNamespaceToken = rNamespaceDefine.mnToken;
+ break;
+ }
+
+ if( !nNamespace && !m_bIgnoreMissingNSDecl )
+ throw SAXException("No namespace defined for " + OUString(XML_CAST(pPrefix),
+ nPrefixLen, RTL_TEXTENCODING_UTF8), Reference< XInterface >(), Any());
+ }
+
+ if( nNamespaceToken != FastToken::DONTKNOW )
+ {
+ sal_Int32 nNameToken = GetToken( pName, nNameLen );
+ if( nNameToken != FastToken::DONTKNOW )
+ return nNamespaceToken | nNameToken;
+ }
+
+ return FastToken::DONTKNOW;
+}
+
+sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL )
+{
+ NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
+ if( aIter != maNamespaceMap.end() )
+ return (*aIter).second;
+ else
+ return FastToken::DONTKNOW;
+}
+
+OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix )
+{
+ Entity& rEntity = getEntity();
+ if( !rEntity.maNamespaceCount.empty() )
+ {
+ sal_uInt32 nNamespace = rEntity.maNamespaceCount.top();
+ while( nNamespace-- )
+ if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix )
+ return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL;
+ }
+
+ throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix),
+ Reference< XInterface >(), Any());
+}
+
+sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName, int nNameLen )
+{
+ if( nNamespaceToken != FastToken::DONTKNOW )
+ {
+ sal_Int32 nNameToken = GetToken( pName, nNameLen );
+ if( nNameToken != FastToken::DONTKNOW )
+ return nNamespaceToken | nNameToken;
+ }
+
+ return FastToken::DONTKNOW;
+}
+
+namespace
+{
+ class ParserCleanup
+ {
+ private:
+ FastSaxParserImpl& m_rParser;
+ Entity& m_rEntity;
+ rtl::Reference<ParserThread> m_xParser;
+ public:
+ ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity)
+ : m_rParser(rParser)
+ , m_rEntity(rEntity)
+ {
+ }
+ ~ParserCleanup()
+ {
+ if (m_rEntity.mpParser)
+ {
+ if (m_rEntity.mpParser->myDoc)
+ xmlFreeDoc(m_rEntity.mpParser->myDoc);
+ xmlFreeParserCtxt(m_rEntity.mpParser);
+ }
+ joinThread();
+ m_rParser.popEntity();
+ }
+ void setThread(const rtl::Reference<ParserThread> &xParser)
+ {
+ m_xParser = xParser;
+ }
+ void joinThread()
+ {
+ if (m_xParser.is())
+ {
+ rtl::Reference<ParserThread> xToJoin = m_xParser;
+ m_xParser.clear();
+ xToJoin->join();
+ }
+ }
+ };
+}
+/***************
+*
+* parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
+* the file-specific initialization work. (During a parser run, external files may be opened)
+*
+****************/
+void FastSaxParserImpl::parseStream(const InputSource& rStructSource)
+{
+ xmlInitParser();
+
+ // Only one text at one time
+ std::unique_lock guard( maMutex );
+
+ pushEntity(maData, rStructSource);
+ Entity& rEntity = getEntity();
+ ParserCleanup aEnsureFree(*this, rEntity);
+
+ // start the document
+ if( rEntity.mxDocumentHandler.is() )
+ {
+ rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator );
+ rEntity.mxDocumentHandler->startDocument();
+ }
+
+#ifdef EMSCRIPTEN
+ rEntity.mbEnableThreads = false;
+#else
+ if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser)
+ {
+ Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY);
+ // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams
+ rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000)
+ || (rEntity.maStructSource.aInputStream->available() > 10000);
+ }
+#endif
+
+ if (rEntity.mbEnableThreads)
+ {
+ rtl::Reference<ParserThread> xParser = new ParserThread(this);
+ xParser->launch();
+ aEnsureFree.setThread(xParser);
+ bool done = false;
+ do {
+ rEntity.maConsumeResume.wait();
+ rEntity.maConsumeResume.reset();
+
+ std::unique_lock aGuard(rEntity.maEventProtector);
+ while (!rEntity.maPendingEvents.empty())
+ {
+ if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater)
+ rEntity.maProduceResume.set(); // start producer again
+
+ EventList aEventList = std::move(rEntity.maPendingEvents.front());
+ rEntity.maPendingEvents.pop();
+ aGuard.unlock(); // unlock
+
+ if (!consume(aEventList))
+ done = true;
+
+ aGuard.lock(); // lock
+
+ if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater )
+ {
+ aGuard.unlock();
+ for (auto& rEvent : aEventList.maEvents)
+ {
+ if (rEvent.mxAttributes.is())
+ {
+ rEvent.mxAttributes->clear();
+ if( rEntity.mxNamespaceHandler.is() )
+ rEvent.mxDeclAttributes->clear();
+ }
+ aEventList.mbIsAttributesEmpty = true;
+ }
+ aGuard.lock();
+ }
+
+ rEntity.maUsedEvents.push(std::move(aEventList));
+ }
+ } while (!done);
+ aEnsureFree.joinThread();
+ deleteUsedEvents();
+
+ // callbacks used inside XML_Parse may have caught an exception
+ // No need to lock maSavedExceptionMutex here because parser
+ // thread is joined.
+ if( rEntity.maSavedException.hasValue() )
+ rEntity.throwException( mxDocumentLocator, true );
+ }
+ else
+ {
+ parse();
+ }
+
+ // finish document
+ if( rEntity.mxDocumentHandler.is() )
+ {
+ rEntity.mxDocumentHandler->endDocument();
+ }
+}
+
+void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler )
+{
+ maData.mxDocumentHandler = Handler;
+}
+
+void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler )
+{
+ assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" );
+ maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() );
+}
+
+void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
+{
+ if( NamespaceToken < FastToken::NAMESPACE )
+ throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0);
+
+ if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
+ {
+ maNamespaceMap[ NamespaceURL ] = NamespaceToken;
+ return;
+ }
+ throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0);
+}
+
+OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix )
+{
+ try
+ {
+ return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) );
+ }
+ catch (const Exception&)
+ {
+ }
+ throw IllegalArgumentException();
+}
+
+void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler)
+{
+ maData.mxErrorHandler = Handler;
+}
+
+void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler )
+{
+ maData.mxNamespaceHandler = Handler;
+}
+
+void FastSaxParserImpl::setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
+{
+ m_Replacements.resize(replacements.size());
+ for (size_t i = 0; i < replacements.size(); ++i)
+ {
+ m_Replacements[i].name = replacements[i].First;
+ m_Replacements[i].replacement = replacements[i].Second;
+ }
+ if (m_Replacements.size() > 1)
+ std::sort(m_Replacements.begin(), m_Replacements.end());
+}
+
+void FastSaxParserImpl::deleteUsedEvents()
+{
+ Entity& rEntity = getEntity();
+ std::unique_lock aGuard(rEntity.maEventProtector);
+
+ while (!rEntity.maUsedEvents.empty())
+ {
+ { // the block makes sure that aEventList is destructed outside the lock
+ EventList aEventList = std::move(rEntity.maUsedEvents.front());
+ rEntity.maUsedEvents.pop();
+
+ aGuard.unlock(); // unlock
+ }
+
+ aGuard.lock(); // lock
+ }
+}
+
+void FastSaxParserImpl::produce( bool bForceFlush )
+{
+ Entity& rEntity = getEntity();
+ if (!(bForceFlush ||
+ rEntity.mnProducedEventsSize >= Entity::mnEventListSize))
+ return;
+
+ std::unique_lock aGuard(rEntity.maEventProtector);
+
+ while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater)
+ { // pause parsing for a bit
+ aGuard.unlock(); // unlock
+ rEntity.maProduceResume.wait();
+ rEntity.maProduceResume.reset();
+ aGuard.lock(); // lock
+ }
+
+ rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents));
+ rEntity.mxProducedEvents.reset();
+ assert(!rEntity.mxProducedEvents);
+
+ aGuard.unlock(); // unlock
+
+ rEntity.maConsumeResume.set();
+}
+
+bool FastSaxParserImpl::consume(EventList& rEventList)
+{
+ Entity& rEntity = getEntity();
+ rEventList.mbIsAttributesEmpty = false;
+ for (auto& rEvent : rEventList.maEvents)
+ {
+ switch (rEvent.maType)
+ {
+ case CallbackType::START_ELEMENT:
+ rEntity.startElement( &rEvent );
+ break;
+ case CallbackType::END_ELEMENT:
+ rEntity.endElement();
+ break;
+ case CallbackType::CHARACTERS:
+ rEntity.characters( rEvent.msChars );
+ break;
+ case CallbackType::PROCESSING_INSTRUCTION:
+ rEntity.processingInstruction(
+ rEvent.msNamespace, rEvent.msElementName ); // ( target, data )
+ break;
+ case CallbackType::DONE:
+ return false;
+ case CallbackType::EXCEPTION:
+ rEntity.throwException( mxDocumentLocator, false );
+ [[fallthrough]]; // avoid unreachable code warning with some compilers
+ default:
+ assert(false);
+ return false;
+ }
+ }
+ return true;
+}
+
+void FastSaxParserImpl::pushEntity(const ParserData& rEntityData,
+ xml::sax::InputSource const& rSource)
+{
+ if (!rSource.aInputStream.is())
+ throw SAXException("No input source", Reference<XInterface>(), Any());
+
+ maEntities.emplace(rEntityData);
+ mpTop = &maEntities.top();
+
+ mpTop->maStructSource = rSource;
+
+ mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream);
+ if (!mpTop->maStructSource.sEncoding.isEmpty())
+ {
+ mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US));
+ }
+}
+
+void FastSaxParserImpl::popEntity()
+{
+ maEntities.pop();
+ mpTop = !maEntities.empty() ? &maEntities.top() : nullptr;
+}
+
+// starts parsing with actual parser !
+void FastSaxParserImpl::parse()
+{
+ const int BUFFER_SIZE = 16 * 1024;
+ Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
+
+ Entity& rEntity = getEntity();
+
+ // set all necessary C-Callbacks
+ static xmlSAXHandler callbacks;
+ callbacks.startElementNs = call_callbackStartElement;
+ callbacks.endElementNs = call_callbackEndElement;
+ callbacks.characters = call_callbackCharacters;
+ callbacks.processingInstruction = call_callbackProcessingInstruction;
+ callbacks.getEntity = call_callbackGetEntity;
+ callbacks.initialized = XML_SAX2_MAGIC;
+ int nRead = 0;
+ do
+ {
+ nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
+ if( nRead <= 0 )
+ {
+ if( rEntity.mpParser != nullptr )
+ {
+ if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK )
+ rEntity.throwException( mxDocumentLocator, true );
+ if (rEntity.hasException())
+ rEntity.throwException(mxDocumentLocator, true);
+ }
+ break;
+ }
+
+ bool bContinue = true;
+ if( rEntity.mpParser == nullptr )
+ {
+ // create parser with proper encoding (needs the first chunk of data)
+ rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this,
+ reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr );
+ if( !rEntity.mpParser )
+ throw SAXException("Couldn't create parser", Reference< XInterface >(), Any() );
+
+ // Tell libxml2 parser to decode entities in attribute values.
+ // Also allow XML attribute values which are larger than 10MB, because this used to work
+ // with expat.
+ // coverity[unsafe_xml_parse_config] - entity support is required
+ xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE);
+ }
+ else
+ {
+ bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 )
+ == XML_ERR_OK;
+ }
+
+ // callbacks used inside XML_Parse may have caught an exception
+ if (!bContinue)
+ {
+ rEntity.throwException( mxDocumentLocator, true );
+ }
+ if (rEntity.hasException())
+ {
+ rEntity.throwException( mxDocumentLocator, true );
+ }
+ } while( nRead > 0 );
+ rEntity.getEvent( CallbackType::DONE );
+ if( rEntity.mbEnableThreads )
+ produce( true );
+}
+
+// The C-Callbacks
+void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
+ int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
+{
+ if (!pendingCharacters.empty())
+ sendPendingCharacters();
+ Entity& rEntity = getEntity();
+ if( rEntity.maNamespaceCount.empty() )
+ {
+ rEntity.maNamespaceCount.push(0);
+ DefineNamespace( "xml", "http://www.w3.org/XML/1998/namespace");
+ }
+ else
+ {
+ rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() );
+ }
+
+ // create attribute map and process namespace instructions
+ Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT );
+ bool bIsAttributesEmpty = false;
+ if ( rEntity.mbEnableThreads )
+ bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty;
+
+ if (rEvent.mxAttributes.is())
+ {
+ if( !bIsAttributesEmpty )
+ rEvent.mxAttributes->clear();
+ }
+ else
+ rEvent.mxAttributes.set(
+ new FastAttributeList( rEntity.mxTokenHandler.get() ) );
+
+ if( rEntity.mxNamespaceHandler.is() )
+ {
+ if (rEvent.mxDeclAttributes.is())
+ {
+ if( !bIsAttributesEmpty )
+ rEvent.mxDeclAttributes->clear();
+ }
+ else
+ rEvent.mxDeclAttributes.set(
+ new FastAttributeList( rEntity.mxTokenHandler.get() ) );
+ }
+
+ OUString sNamespace;
+ sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
+ if (!rEntity.maNamespaceStack.empty())
+ {
+ sNamespace = rEntity.maNamespaceStack.top().msName;
+ nNamespaceToken = rEntity.maNamespaceStack.top().mnToken;
+ }
+
+ try
+ {
+ /* #158414# Each element may define new namespaces, also for attributes.
+ First, process all namespaces, second, process the attributes after namespaces
+ have been initialized. */
+
+ // #158414# first: get namespaces
+ for (int i = 0; i < numNamespaces * 2; i += 2)
+ {
+ // namespaces[] is (prefix/URI)
+ if( namespaces[ i ] != nullptr )
+ {
+ OString aPrefix( XML_CAST( namespaces[ i ] ));
+ OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
+ NormalizeURI( namespaceURL );
+ DefineNamespace(aPrefix, namespaceURL);
+ if( rEntity.mxNamespaceHandler.is() )
+ rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
+ }
+ else
+ {
+ // default namespace
+ sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
+ NormalizeURI( sNamespace );
+ nNamespaceToken = GetNamespaceToken( sNamespace );
+ if( rEntity.mxNamespaceHandler.is() )
+ rEvent.mxDeclAttributes->addUnknown( "", OString( XML_CAST( namespaces[ i + 1 ] ) ) );
+ }
+ }
+
+ if ( rEntity.mxTokenHandler.is() )
+ {
+ // #158414# second: fill attribute list with other attributes
+ rEvent.mxAttributes->reserve( numAttributes );
+ for (int i = 0; i < numAttributes * 5; i += 5)
+ {
+ // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd )
+ if( attributes[ i + 1 ] != nullptr )
+ {
+ sal_Int32 nAttributeToken = GetTokenWithPrefix( attributes[ i + 1 ], strlen( XML_CAST( attributes[ i + 1 ] )), attributes[ i ], strlen( XML_CAST( attributes[ i ] )));
+ if( nAttributeToken != FastToken::DONTKNOW )
+ rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] );
+ else
+ addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
+ }
+ else
+ {
+ sal_Int32 nAttributeToken = GetToken( attributes[ i ], strlen( XML_CAST( attributes[ i ] )));
+ if( nAttributeToken != FastToken::DONTKNOW )
+ rEvent.mxAttributes->add( nAttributeToken, XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] );
+ else
+ {
+ SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" <<
+ OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
+ rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
+ OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
+ }
+ }
+ }
+
+ if( prefix != nullptr )
+ rEvent.mnElementToken = GetTokenWithPrefix( prefix, strlen( XML_CAST( prefix )), localName, strlen( XML_CAST( localName )));
+ else if( !sNamespace.isEmpty() )
+ rEvent.mnElementToken = GetTokenWithContextNamespace( nNamespaceToken, localName, strlen( XML_CAST( localName )));
+ else
+ rEvent.mnElementToken = GetToken( localName, strlen( XML_CAST( localName )));
+ }
+ else
+ {
+ for (int i = 0; i < numAttributes * 5; i += 5)
+ {
+ if( attributes[ i + 1 ] != nullptr )
+ addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes);
+ else
+ rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ),
+ OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
+ }
+
+ rEvent.mnElementToken = FastToken::DONTKNOW;
+ }
+
+ if( rEvent.mnElementToken == FastToken::DONTKNOW )
+ {
+ OUString aElementPrefix;
+ if( prefix != nullptr )
+ {
+ if ( !m_bIgnoreMissingNSDecl || URI != nullptr )
+ sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 );
+ else
+ sNamespace.clear();
+ nNamespaceToken = GetNamespaceToken( sNamespace );
+ aElementPrefix = OUString( XML_CAST( prefix ), strlen( XML_CAST( prefix )), RTL_TEXTENCODING_UTF8 );
+ }
+ OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 );
+ rEvent.msNamespace = sNamespace;
+ if( aElementPrefix.isEmpty() )
+ rEvent.msElementName = std::move(aElementLocalName);
+ else
+ rEvent.msElementName = aElementPrefix + ":" + aElementLocalName;
+ }
+ else // token is always preferred.
+ rEvent.msElementName.clear();
+
+ rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) );
+ if (rEntity.mbEnableThreads)
+ produce();
+ else
+ {
+ SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName);
+ rEntity.startElement( &rEvent );
+ }
+ }
+ catch (...)
+ {
+ rEntity.saveException( ::cppu::getCaughtException() );
+ }
+}
+
+void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes)
+{
+ OUString aNamespaceURI;
+ if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr )
+ aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 );
+ const OString& rPrefix = OString( XML_CAST( attributes[ i + 1 ] ));
+ const OString& rLocalName = OString( XML_CAST( attributes[ i ] ));
+ OString aQualifiedName = (rPrefix.isEmpty())? rLocalName : rPrefix + ":" + rLocalName;
+ xAttributes->addUnknown( aNamespaceURI, aQualifiedName,
+ OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] ));
+ // ignore an element that otherwise generates a lot of noise in the logs
+ SAL_WARN_IF(aQualifiedName != "x14ac:dyDescent", "xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI);
+}
+
+void FastSaxParserImpl::callbackEndElement()
+{
+ if (!pendingCharacters.empty())
+ sendPendingCharacters();
+ Entity& rEntity = getEntity();
+ SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
+ if( !rEntity.maNamespaceCount.empty() )
+ rEntity.maNamespaceCount.pop();
+
+ SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack");
+ if( !rEntity.maNamespaceStack.empty() )
+ rEntity.maNamespaceStack.pop();
+
+ rEntity.getEvent( CallbackType::END_ELEMENT );
+ if (rEntity.mbEnableThreads)
+ produce();
+ else
+ rEntity.endElement();
+}
+
+void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
+{
+ // SAX interface allows that the characters callback splits content of one XML node
+ // (e.g. because there's an entity that needs decoding), however for consumers it's
+ // simpler FastSaxParser's character callback provides the whole string at once,
+ // so merge data from possible multiple calls and send them at once (before the element
+ // ends or another one starts).
+ //
+ // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
+ // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
+ // often in writer documents.
+ int nOriginalLen = pendingCharacters.size();
+ pendingCharacters.resize(nOriginalLen + nLen);
+ memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
+}
+
+void FastSaxParserImpl::sendPendingCharacters()
+{
+ Entity& rEntity = getEntity();
+ OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
+ if (rEntity.mbEnableThreads)
+ {
+ Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS );
+ rEvent.msChars = std::move(sChars);
+ produce();
+ }
+ else
+ rEntity.characters( sChars );
+ pendingCharacters.resize(0);
+}
+
+void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
+{
+ if (!pendingCharacters.empty())
+ sendPendingCharacters();
+ Entity& rEntity = getEntity();
+ Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION );
+
+ // This event is very rare, so no need to waste extra space for this
+ // Using namespace and element strings to be target and data in that order.
+ rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 );
+ if ( data != nullptr )
+ rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 );
+ else
+ rEvent.msElementName.clear();
+
+ if (rEntity.mbEnableThreads)
+ produce();
+ else
+ rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName );
+}
+
+xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name )
+{
+ if( !name )
+ return xmlGetPredefinedEntity(name);
+ const char* dname = XML_CAST(name);
+ int lname = strlen(dname);
+ if( lname == 0 )
+ return xmlGetPredefinedEntity(name);
+ if (m_Replacements.size() > 0)
+ {
+ auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname);
+ if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0)
+ {
+ xmlEntityPtr entpt = xmlNewEntity(
+ nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
+ BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr()));
+ m_TemporalEntities.push_back(entpt);
+ return entpt;
+ }
+ }
+ if( lname < 2 )
+ return xmlGetPredefinedEntity(name);
+ if ( dname[0] == '#' )
+ {
+ sal_uInt32 cval = 0;
+ if( dname[1] == 'x' || dname[1] == 'X' )
+ {
+ if( lname < 3 )
+ return xmlGetPredefinedEntity(name);
+ cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) );
+ if( cval == 0 )
+ return xmlGetPredefinedEntity(name);
+ OUString vname( &cval, 1 );
+ xmlEntityPtr entpt
+ = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
+ BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
+ m_TemporalEntities.push_back(entpt);
+ return entpt;
+ }
+ else
+ {
+ cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) );
+ if( cval == 0 )
+ return xmlGetPredefinedEntity(name);
+ OUString vname(&cval, 1);
+ xmlEntityPtr entpt
+ = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr,
+ BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr()));
+ m_TemporalEntities.push_back(entpt);
+ return entpt;
+ }
+ }
+ return xmlGetPredefinedEntity(name);
+}
+
+FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {}
+
+FastSaxParser::~FastSaxParser()
+{
+}
+
+void SAL_CALL
+FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments)
+{
+ if (!rArguments.hasElements())
+ return;
+
+ OUString str;
+ if ( !(rArguments[0] >>= str) )
+ throw IllegalArgumentException();
+
+ if ( str == "IgnoreMissingNSDecl" )
+ mpImpl->m_bIgnoreMissingNSDecl = true;
+ else if ( str == "DoSmeplease" )
+ ; //just ignore as this is already immune to billion laughs
+ else if ( str == "DisableThreadedParser" )
+ mpImpl->m_bDisableThreadedParser = true;
+ else
+ throw IllegalArgumentException();
+
+}
+
+void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource )
+{
+ mpImpl->parseStream(aInputSource);
+}
+
+void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler )
+{
+ mpImpl->setFastDocumentHandler(Handler);
+}
+
+void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler )
+{
+ mpImpl->setTokenHandler(Handler);
+}
+
+void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken )
+{
+ mpImpl->registerNamespace(NamespaceURL, NamespaceToken);
+}
+
+OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix )
+{
+ return mpImpl->getNamespaceURL(rPrefix);
+}
+
+void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler )
+{
+ mpImpl->setErrorHandler(Handler);
+}
+
+void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& )
+{
+ // not implemented
+}
+
+void FastSaxParser::setLocale( const lang::Locale& )
+{
+ // not implemented
+}
+
+void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler)
+{
+ mpImpl->setNamespaceHandler(Handler);
+}
+
+OUString FastSaxParser::getImplementationName()
+{
+ return "com.sun.star.comp.extensions.xml.sax.FastParser";
+}
+
+void FastSaxParser::setCustomEntityNames(
+ const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
+{
+ mpImpl->setCustomEntityNames(replacements);
+}
+
+sal_Bool FastSaxParser::supportsService( const OUString& ServiceName )
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames()
+{
+ return { "com.sun.star.xml.sax.FastParser" };
+}
+
+} // namespace sax_fastparser
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new FastSaxParser);
+}
+
+// ----------------------------------------------------------
+// copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases
+// for various dodgy namespace decls in the wild.
+
+static bool NormalizeW3URI( OUString& rName );
+static bool NormalizeOasisURN( OUString& rName );
+
+static void NormalizeURI( OUString& rName )
+{
+ // try OASIS + W3 URI normalization
+ bool bSuccess = NormalizeOasisURN( rName );
+ if( ! bSuccess )
+ bSuccess = NormalizeW3URI( rName );
+}
+
+constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/");
+constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms");
+constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms");
+constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg");
+constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0");
+constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format");
+constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0");
+constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/");
+constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20");
+constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0");
+constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc");
+constexpr OUStringLiteral XML_XMLNS(u"xmlns");
+constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument");
+constexpr OUStringLiteral XML_1_0(u"1.0");
+
+static bool NormalizeW3URI( OUString& rName )
+{
+ // check if URI matches:
+ // http://www.w3.org/[0-9]*/[:letter:]*
+ // (year)/(WG name)
+ // For the following WG/standards names:
+ // - xforms
+
+ bool bSuccess = false;
+ const OUString& sURIPrefix = XML_URI_W3_PREFIX;
+ if( rName.startsWith( sURIPrefix ) )
+ {
+ const OUString& sURISuffix = XML_URI_XFORMS_SUFFIX ;
+ sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength();
+ if( rName.subView( nCompareFrom ) == sURISuffix )
+ {
+ // found W3 prefix, and xforms suffix
+ rName = XML_N_XFORMS_1_0;
+ bSuccess = true;
+ }
+ }
+ return bSuccess;
+}
+
+static bool NormalizeOasisURN( OUString& rName )
+{
+ // #i38644#
+ // we exported the wrong namespace for smil, so we correct this here on load
+ // for older documents
+ if( rName == XML_N_SVG )
+ {
+ rName = XML_N_SVG_COMPAT;
+ return true;
+ }
+ else if( rName == XML_N_FO )
+ {
+ rName = XML_N_FO_COMPAT;
+ return true;
+ }
+ else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD )
+ {
+ rName = XML_N_SMIL_COMPAT;
+ return true;
+ }
+
+
+ // Check if URN matches
+ // :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]*
+ // |---| |---| |-----|
+ // TC-Id Sub-Id Version
+
+ sal_Int32 nNameLen = rName.getLength();
+ // :urn:oasis:names:tc.*
+ const OUString& rOasisURN = XML_URN_OASIS_NAMES_TC;
+ if( !rName.startsWith( rOasisURN ) )
+ return false;
+
+ // :urn:oasis:names:tc:.*
+ sal_Int32 nPos = rOasisURN.getLength();
+ if( nPos >= nNameLen || rName[nPos] != ':' )
+ return false;
+
+ // :urn:oasis:names:tc:[^:]:.*
+ sal_Int32 nTCIdStart = nPos+1;
+ sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart );
+ if( -1 == nTCIdEnd )
+ return false;
+
+ // :urn:oasis:names:tc:[^:]:xmlns.*
+ nPos = nTCIdEnd + 1;
+ std::u16string_view sTmp( rName.subView( nPos ) );
+ const OUString& rXMLNS = XML_XMLNS;
+ if( !o3tl::starts_with(sTmp, rXMLNS ) )
+ return false;
+
+ // :urn:oasis:names:tc:[^:]:xmlns:.*
+ nPos += rXMLNS.getLength();
+ if( nPos >= nNameLen || rName[nPos] != ':' )
+ return false;
+
+ // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.*
+ nPos = rName.indexOf( ':', nPos+1 );
+ if( -1 == nPos )
+ return false;
+
+ // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]*
+ sal_Int32 nVersionStart = nPos+1;
+ if( nVersionStart+2 >= nNameLen ||
+ -1 != rName.indexOf( ':', nVersionStart ) )
+ return false;
+
+ // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]*
+ if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' )
+ return false;
+
+ // replace [tcid] with current TCID and version with current version.
+
+ rName = rName.subView( 0, nTCIdStart ) +
+ XML_OPENDOCUMENT +
+ rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) +
+ XML_1_0;
+
+ return true;
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/fastparser/legacyfastparser.cxx b/sax/source/fastparser/legacyfastparser.cxx
new file mode 100644
index 000000000..e4c425bd0
--- /dev/null
+++ b/sax/source/fastparser/legacyfastparser.cxx
@@ -0,0 +1,375 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/xml/sax/XParser.hpp>
+#include <com/sun/star/xml/sax/FastParser.hpp>
+#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/beans/Pair.hpp>
+#include <comphelper/attributelist.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <comphelper/processfactory.hxx>
+#include <rtl/ref.hxx>
+#include <memory>
+#include <utility>
+#include <vector>
+
+using namespace ::cppu;
+using namespace css;
+using namespace uno;
+using namespace lang;
+using namespace xml::sax;
+using namespace io;
+
+namespace {
+
+class NamespaceHandler : public WeakImplHelper< XFastNamespaceHandler >
+{
+private:
+ struct NamespaceDefine
+ {
+ OUString m_aPrefix;
+ OUString m_aNamespaceURI;
+
+ NamespaceDefine( OUString aPrefix, OUString aNamespaceURI ) : m_aPrefix(std::move( aPrefix )), m_aNamespaceURI(std::move( aNamespaceURI )) {}
+ };
+ std::vector< std::unique_ptr< NamespaceDefine > > m_aNamespaceDefines;
+
+public:
+ NamespaceHandler();
+ void addNSDeclAttributes( rtl::Reference < comphelper::AttributeList > const & rAttrList );
+
+ //XFastNamespaceHandler
+ virtual void SAL_CALL registerNamespace( const OUString& rNamespacePrefix, const OUString& rNamespaceURI ) override;
+ virtual OUString SAL_CALL getNamespaceURI( const OUString& rNamespacePrefix ) override;
+};
+
+NamespaceHandler::NamespaceHandler()
+{
+}
+
+void NamespaceHandler::addNSDeclAttributes( rtl::Reference < comphelper::AttributeList > const & rAttrList )
+{
+ for(const auto& aNamespaceDefine : m_aNamespaceDefines)
+ {
+ OUString& rPrefix = aNamespaceDefine->m_aPrefix;
+ OUString& rNamespaceURI = aNamespaceDefine->m_aNamespaceURI;
+ OUString sDecl;
+ if ( rPrefix.isEmpty() )
+ sDecl = "xmlns";
+ else
+ sDecl = "xmlns:" + rPrefix;
+ rAttrList->AddAttribute( sDecl, "CDATA", rNamespaceURI );
+ }
+ m_aNamespaceDefines.clear();
+}
+
+void NamespaceHandler::registerNamespace( const OUString& rNamespacePrefix, const OUString& rNamespaceURI )
+{
+ m_aNamespaceDefines.push_back( std::make_unique<NamespaceDefine>(
+ rNamespacePrefix, rNamespaceURI) );
+}
+
+OUString NamespaceHandler::getNamespaceURI( const OUString&/* rNamespacePrefix */ )
+{
+ return OUString();
+}
+
+class SaxLegacyFastParser : public WeakImplHelper< XInitialization, XServiceInfo, XParser >
+{
+private:
+ rtl::Reference< NamespaceHandler > m_aNamespaceHandler;
+public:
+ SaxLegacyFastParser();
+
+// css::lang::XInitialization:
+ virtual void SAL_CALL initialize(css::uno::Sequence<css::uno::Any> const& rArguments) override;
+
+// The SAX-Parser-Interface
+ virtual void SAL_CALL parseStream( const InputSource& structSource) override;
+ virtual void SAL_CALL setDocumentHandler(const Reference< XDocumentHandler > & xHandler) override;
+ virtual void SAL_CALL setErrorHandler(const Reference< XErrorHandler > & xHandler) override;
+ virtual void SAL_CALL setDTDHandler(const Reference < XDTDHandler > & xHandler) override;
+ virtual void SAL_CALL setEntityResolver(const Reference< XEntityResolver >& xResolver) override;
+ virtual void SAL_CALL setLocale( const Locale &locale ) override;
+
+// XServiceInfo
+ OUString SAL_CALL getImplementationName() override;
+ Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
+ sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
+
+private:
+ Reference< XFastParser > m_xParser;
+ Reference< XDocumentHandler > m_xDocumentHandler;
+ Reference< XFastTokenHandler > m_xTokenHandler;
+
+};
+
+
+class CallbackDocumentHandler : public WeakImplHelper< XFastDocumentHandler >
+{
+private:
+ Reference< XDocumentHandler > m_xDocumentHandler;
+ Reference< XFastTokenHandler > m_xTokenHandler;
+ rtl::Reference< NamespaceHandler > m_aNamespaceHandler;
+ OUString getNamespacePrefixFromToken( sal_Int32 nToken );
+ OUString getNameFromToken( sal_Int32 nToken );
+
+ static constexpr OUStringLiteral aDefaultNamespace = u"";
+ static constexpr OUStringLiteral aNamespaceSeparator = u":";
+
+public:
+ CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler,
+ rtl::Reference< NamespaceHandler > const & rNamespaceHandler,
+ Reference< XFastTokenHandler > const & xTokenHandler);
+
+ // XFastDocumentHandler
+ virtual void SAL_CALL startDocument() override;
+ virtual void SAL_CALL endDocument() override;
+ virtual void SAL_CALL processingInstruction( const OUString& rTarget, const OUString& rData ) override;
+ virtual void SAL_CALL setDocumentLocator( const Reference< XLocator >& xLocator ) override;
+
+ // XFastContextHandler
+ virtual void SAL_CALL startFastElement( sal_Int32 nElement, const Reference< XFastAttributeList >& Attribs ) override;
+ virtual void SAL_CALL startUnknownElement( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs ) override;
+ virtual void SAL_CALL endFastElement( sal_Int32 Element ) override;
+ virtual void SAL_CALL endUnknownElement( const OUString& Namespace, const OUString& Name ) override;
+ virtual Reference< XFastContextHandler > SAL_CALL createFastChildContext( sal_Int32 nElement, const Reference< XFastAttributeList >& Attribs ) override;
+ virtual Reference< XFastContextHandler > SAL_CALL createUnknownChildContext( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs ) override;
+ virtual void SAL_CALL characters( const OUString& aChars ) override;
+
+};
+
+OUString CallbackDocumentHandler::getNamespacePrefixFromToken( sal_Int32 nToken )
+{
+ if ( ( nToken & 0xffff0000 ) != 0 )
+ {
+ Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff0000 );
+ return OUString( reinterpret_cast< const char* >(
+ aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 );
+ }
+ else
+ return OUString();
+}
+
+OUString CallbackDocumentHandler::getNameFromToken( sal_Int32 nToken )
+{
+ Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff );
+ return OUString( reinterpret_cast< const char* >(
+ aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 );
+}
+
+CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler,
+ rtl::Reference< NamespaceHandler > const & rNamespaceHandler,
+ Reference< XFastTokenHandler > const & xTokenHandler)
+{
+ m_xDocumentHandler.set( xDocumentHandler );
+ m_aNamespaceHandler = rNamespaceHandler;
+ m_xTokenHandler.set( xTokenHandler );
+}
+
+void SAL_CALL CallbackDocumentHandler::startDocument()
+{
+ if ( m_xDocumentHandler.is() )
+ m_xDocumentHandler->startDocument();
+}
+
+void SAL_CALL CallbackDocumentHandler::endDocument()
+{
+ if ( m_xDocumentHandler.is() )
+ m_xDocumentHandler->endDocument();
+}
+
+void SAL_CALL CallbackDocumentHandler::processingInstruction( const OUString& rTarget, const OUString& rData )
+{
+ if ( m_xDocumentHandler.is() )
+ m_xDocumentHandler->processingInstruction( rTarget, rData );
+}
+
+void SAL_CALL CallbackDocumentHandler::setDocumentLocator( const Reference< XLocator >& xLocator )
+{
+ if ( m_xDocumentHandler.is() )
+ m_xDocumentHandler->setDocumentLocator( xLocator );
+}
+
+void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32 nElement , const Reference< XFastAttributeList >& Attribs )
+{
+ const OUString& rPrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nElement );
+ const OUString& rLocalName = CallbackDocumentHandler::getNameFromToken( nElement );
+ startUnknownElement( aDefaultNamespace, (rPrefix.isEmpty())? rLocalName : rPrefix + aNamespaceSeparator + rLocalName, Attribs );
+}
+
+void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& /*Namespace*/, const OUString& Name, const Reference< XFastAttributeList >& Attribs )
+{
+ if ( !m_xDocumentHandler.is() )
+ return;
+
+ rtl::Reference < comphelper::AttributeList > rAttrList = new comphelper::AttributeList;
+ m_aNamespaceHandler->addNSDeclAttributes( rAttrList );
+
+ const Sequence< xml::FastAttribute > fastAttribs = Attribs->getFastAttributes();
+ for (const auto& rAttr : fastAttribs)
+ {
+ const OUString& rAttrValue = rAttr.Value;
+ sal_Int32 nToken = rAttr.Token;
+ const OUString& rAttrNamespacePrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nToken );
+ OUString sAttrName = CallbackDocumentHandler::getNameFromToken( nToken );
+ if ( !rAttrNamespacePrefix.isEmpty() )
+ sAttrName = rAttrNamespacePrefix + aNamespaceSeparator + sAttrName;
+
+ rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue );
+ }
+
+ const Sequence< xml::Attribute > unknownAttribs = Attribs->getUnknownAttributes();
+ for (const auto& rAttr : unknownAttribs)
+ {
+ const OUString& rAttrValue = rAttr.Value;
+ const OUString& rAttrName = rAttr.Name;
+
+ rAttrList->AddAttribute( rAttrName, "CDATA", rAttrValue );
+ }
+ m_xDocumentHandler->startElement( Name, rAttrList );
+}
+
+void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32 nElement )
+{
+ const OUString& rPrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nElement );
+ const OUString& rLocalName = CallbackDocumentHandler::getNameFromToken( nElement );
+ endUnknownElement( aDefaultNamespace, (rPrefix.isEmpty())? rLocalName : rPrefix + aNamespaceSeparator + rLocalName );
+}
+
+
+void SAL_CALL CallbackDocumentHandler::endUnknownElement( const OUString& /*Namespace*/, const OUString& Name )
+{
+ if ( m_xDocumentHandler.is() )
+ {
+ m_xDocumentHandler->endElement( Name );
+ }
+}
+
+Reference< XFastContextHandler > SAL_CALL CallbackDocumentHandler::createFastChildContext( sal_Int32/* nElement */, const Reference< XFastAttributeList >&/* Attribs */ )
+{
+ return this;
+}
+
+
+Reference< XFastContextHandler > SAL_CALL CallbackDocumentHandler::createUnknownChildContext( const OUString&/* Namespace */, const OUString&/* Name */, const Reference< XFastAttributeList >&/* Attribs */ )
+{
+ return this;
+}
+
+void SAL_CALL CallbackDocumentHandler::characters( const OUString& aChars )
+{
+ if ( m_xDocumentHandler.is() )
+ m_xDocumentHandler->characters( aChars );
+}
+
+SaxLegacyFastParser::SaxLegacyFastParser( ) : m_aNamespaceHandler( new NamespaceHandler ),
+ m_xParser(FastParser::create(::comphelper::getProcessComponentContext() ))
+{
+ m_xParser->setNamespaceHandler( m_aNamespaceHandler );
+}
+
+void SAL_CALL SaxLegacyFastParser::initialize(Sequence< Any > const& rArguments )
+{
+ if (!rArguments.hasElements())
+ return;
+
+ Reference< XFastTokenHandler > xTokenHandler;
+ OUString str;
+ if ( ( rArguments[0] >>= xTokenHandler ) && xTokenHandler.is() )
+ {
+ m_xTokenHandler.set( xTokenHandler );
+ }
+ else if ( ( rArguments[0] >>= str ) && "registerNamespaces" == str )
+ {
+ css::beans::Pair< OUString, sal_Int32 > rPair;
+ for (sal_Int32 i = 1; i < rArguments.getLength(); i++ )
+ {
+ rArguments[i] >>= rPair;
+ m_xParser->registerNamespace( rPair.First, rPair.Second );
+ }
+ }
+ else
+ {
+ uno::Reference<lang::XInitialization> const xInit(m_xParser,
+ uno::UNO_QUERY_THROW);
+ xInit->initialize( rArguments );
+ }
+}
+
+void SaxLegacyFastParser::parseStream( const InputSource& structSource )
+{
+ m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler,
+ m_aNamespaceHandler, m_xTokenHandler ) );
+ m_xParser->setTokenHandler( m_xTokenHandler );
+ m_xParser->parseStream( structSource );
+}
+
+void SaxLegacyFastParser::setDocumentHandler( const Reference< XDocumentHandler > & xHandler )
+{
+ m_xDocumentHandler = xHandler;
+}
+
+void SaxLegacyFastParser::setErrorHandler( const Reference< XErrorHandler > & xHandler )
+{
+ m_xParser->setErrorHandler( xHandler );
+}
+
+void SaxLegacyFastParser::setDTDHandler( const Reference < XDTDHandler > &/* xHandler */ )
+{
+
+}
+
+void SaxLegacyFastParser::setEntityResolver( const Reference< XEntityResolver >& xResolver )
+{
+ m_xParser->setEntityResolver( xResolver );
+}
+
+void SaxLegacyFastParser::setLocale( const Locale &locale )
+{
+ m_xParser->setLocale( locale );
+}
+
+OUString SaxLegacyFastParser::getImplementationName()
+{
+ return "com.sun.star.comp.extensions.xml.sax.LegacyFastParser";
+}
+
+sal_Bool SaxLegacyFastParser::supportsService(const OUString& ServiceName)
+{
+ return cppu::supportsService(this, ServiceName);
+}
+
+Sequence< OUString > SaxLegacyFastParser::getSupportedServiceNames()
+{
+ return { "com.sun.star.xml.sax.LegacyFastParser" };
+}
+
+} //namespace
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_comp_extensions_xml_sax_LegacyFastParser_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new SaxLegacyFastParser);
+}
+
+ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx
new file mode 100644
index 000000000..7d9e514c3
--- /dev/null
+++ b/sax/source/tools/CachedOutputStream.hxx
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX
+#define INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX
+
+#include <sal/types.h>
+
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/uno/Sequence.hxx>
+
+#include <cstring>
+#include <memory>
+
+namespace sax_fastparser {
+
+class ForMergeBase
+{
+public:
+ virtual ~ForMergeBase() {}
+ virtual void append( const css::uno::Sequence<sal_Int8>& rWhat ) = 0;
+};
+
+class CachedOutputStream
+{
+ /// When buffer hits this size, it's written to mxOutputStream
+ static const sal_Int32 mnMaximumSize = 0x100000; // 1Mbyte
+
+ /// ForMerge structure is used for sorting elements in Writer
+ std::shared_ptr< ForMergeBase > mpForMerge;
+ const css::uno::Sequence<sal_Int8> mpCache;
+ /// Output stream, usually writing data into files.
+ css::uno::Reference< css::io::XOutputStream > mxOutputStream;
+ uno_Sequence *pSeq;
+ sal_Int32 mnCacheWrittenSize;
+ bool mbWriteToOutStream;
+
+public:
+ CachedOutputStream() : mpCache(mnMaximumSize)
+ , pSeq(mpCache.get())
+ , mnCacheWrittenSize(0)
+ , mbWriteToOutStream(true)
+ {}
+
+ const css::uno::Reference< css::io::XOutputStream >& getOutputStream() const
+ {
+ return mxOutputStream;
+ }
+
+ void setOutputStream( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
+ {
+ mxOutputStream = xOutputStream;
+ }
+
+ void setOutput( std::shared_ptr< ForMergeBase > pForMerge )
+ {
+ flush();
+ mbWriteToOutStream = false;
+ mpForMerge = pForMerge;
+ }
+
+ void resetOutputToStream()
+ {
+ flush();
+ mbWriteToOutStream = true;
+ mpForMerge.reset();
+ }
+
+ /// cache string and if limit is hit, flush
+ void writeBytes( const sal_Int8* pStr, sal_Int32 nLen )
+ {
+ // Write when the buffer gets big enough
+ if (mnCacheWrittenSize + nLen > mnMaximumSize)
+ {
+ flush();
+
+ // Writer does some elements sorting, so it can accumulate
+ // pretty big strings in FastSaxSerializer::ForMerge.
+ // In that case, just flush data and write immediately.
+ if (nLen > mnMaximumSize)
+ {
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ else
+ mpForMerge->append( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ return;
+ }
+ }
+
+ memcpy(pSeq->elements + mnCacheWrittenSize, pStr, nLen);
+ mnCacheWrittenSize += nLen;
+ }
+
+ /// immediately write buffer into mxOutputStream and clear
+ void flush()
+ {
+ // resize the Sequence to written size
+ pSeq->nElements = mnCacheWrittenSize;
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( mpCache );
+ else
+ mpForMerge->append( mpCache );
+ // and next time write to the beginning
+ mnCacheWrittenSize = 0;
+ }
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx
new file mode 100644
index 000000000..2abfe3575
--- /dev/null
+++ b/sax/source/tools/converter.cxx
@@ -0,0 +1,2535 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sax/tools/converter.hxx>
+
+#include <com/sun/star/i18n/UnicodeType.hpp>
+#include <com/sun/star/util/DateTime.hpp>
+#include <com/sun/star/util/Date.hpp>
+#include <com/sun/star/util/Duration.hpp>
+#include <com/sun/star/util/Time.hpp>
+#include <optional>
+
+#include <rtl/ustrbuf.hxx>
+#include <rtl/math.hxx>
+#include <rtl/character.hxx>
+#include <sal/log.hxx>
+#include <o3tl/typed_flags_set.hxx>
+#include <o3tl/unit_conversion.hxx>
+#include <osl/diagnose.h>
+#include <tools/long.hxx>
+
+#include <algorithm>
+#include <string_view>
+
+using namespace com::sun::star;
+using namespace com::sun::star::uno;
+using namespace com::sun::star::util;
+using namespace ::com::sun::star::i18n;
+
+
+namespace sax {
+
+const std::string_view gpsMM = "mm";
+const std::string_view gpsCM = "cm";
+const std::string_view gpsPT = "pt";
+const std::string_view gpsINCH = "in";
+const std::string_view gpsPC = "pc";
+
+const sal_Int8 XML_MAXDIGITSCOUNT_TIME = 14;
+
+static sal_Int64 toInt64_WithLength(const sal_Unicode * str, sal_Int16 radix, sal_Int32 nStrLength )
+{
+ return rtl_ustr_toInt64_WithLength(str, radix, nStrLength);
+}
+static sal_Int64 toInt64_WithLength(const char * str, sal_Int16 radix, sal_Int32 nStrLength )
+{
+ return rtl_str_toInt64_WithLength(str, radix, nStrLength);
+}
+
+namespace
+{
+o3tl::Length Measure2O3tlUnit(sal_Int16 nUnit)
+{
+ switch (nUnit)
+ {
+ case MeasureUnit::TWIP:
+ return o3tl::Length::twip;
+ case MeasureUnit::POINT:
+ return o3tl::Length::pt;
+ case MeasureUnit::MM_10TH:
+ return o3tl::Length::mm10;
+ case MeasureUnit::MM_100TH:
+ return o3tl::Length::mm100;
+ case MeasureUnit::MM:
+ return o3tl::Length::mm;
+ case MeasureUnit::CM:
+ return o3tl::Length::cm;
+ default:
+ SAL_WARN("sax", "unit not supported for length");
+ [[fallthrough]];
+ case MeasureUnit::INCH:
+ return o3tl::Length::in;
+ }
+}
+
+std::string_view Measure2UnitString(sal_Int16 nUnit)
+{
+ switch (nUnit)
+ {
+ case MeasureUnit::TWIP:
+ return gpsPC; // ??
+ case MeasureUnit::POINT:
+ return gpsPT;
+ case MeasureUnit::MM_10TH:
+ case MeasureUnit::MM_100TH:
+ return {};
+ case MeasureUnit::MM:
+ return gpsMM;
+ case MeasureUnit::CM:
+ return gpsCM;
+ case MeasureUnit::INCH:
+ default:
+ return gpsINCH;
+ }
+}
+
+template <typename V> bool wordEndsWith(V string, std::string_view expected)
+{
+ V substr = string.substr(0, expected.size());
+ return std::equal(substr.begin(), substr.end(), expected.begin(), expected.end(),
+ [](sal_uInt32 c1, sal_uInt32 c2) { return rtl::toAsciiLowerCase(c1) == c2; })
+ && (string.size() == expected.size() || string[expected.size()] == ' ');
+}
+
+}
+
+/** convert string to measure using optional min and max values*/
+template<typename V>
+static bool lcl_convertMeasure( sal_Int32& rValue,
+ V rString,
+ sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int32 nMin /* = SAL_MIN_INT32 */,
+ sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+{
+ bool bNeg = false;
+ double nVal = 0;
+
+ sal_Int32 nPos = 0;
+ sal_Int32 const nLen = rString.size();
+
+ // skip white space
+ while( (nPos < nLen) && (rString[nPos] <= ' ') )
+ nPos++;
+
+ if( nPos < nLen && '-' == rString[nPos] )
+ {
+ bNeg = true;
+ nPos++;
+ }
+
+ // get number
+ while( nPos < nLen &&
+ '0' <= rString[nPos] &&
+ '9' >= rString[nPos] )
+ {
+ // TODO: check overflow!
+ nVal *= 10;
+ nVal += (rString[nPos] - '0');
+ nPos++;
+ }
+ if( nPos < nLen && '.' == rString[nPos] )
+ {
+ nPos++;
+ double nDiv = 1.;
+
+ while( nPos < nLen &&
+ '0' <= rString[nPos] &&
+ '9' >= rString[nPos] )
+ {
+ // TODO: check overflow!
+ nDiv *= 10;
+ nVal += ( static_cast<double>(rString[nPos] - '0') / nDiv );
+ nPos++;
+ }
+ }
+
+ // skip white space
+ while( (nPos < nLen) && (rString[nPos] <= ' ') )
+ nPos++;
+
+ if( nPos < nLen )
+ {
+
+ if( MeasureUnit::PERCENT == nTargetUnit )
+ {
+ if( '%' != rString[nPos] )
+ return false;
+ }
+ else if( MeasureUnit::PIXEL == nTargetUnit )
+ {
+ if( nPos + 1 >= nLen ||
+ ('p' != rString[nPos] &&
+ 'P' != rString[nPos])||
+ ('x' != rString[nPos+1] &&
+ 'X' != rString[nPos+1]) )
+ return false;
+ }
+ else
+ {
+ OSL_ENSURE( MeasureUnit::TWIP == nTargetUnit || MeasureUnit::POINT == nTargetUnit ||
+ MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit ||
+ MeasureUnit::PIXEL == nTargetUnit, "unit is not supported");
+
+ o3tl::Length eFrom = o3tl::Length::invalid;
+
+ if( MeasureUnit::TWIP == nTargetUnit )
+ {
+ switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+ {
+ case u'c':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::cm;
+ break;
+ case u'i':
+ if (wordEndsWith(rString.substr(nPos + 1), "n"))
+ eFrom = o3tl::Length::in;
+ break;
+ case u'm':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::mm;
+ break;
+ case u'p':
+ if (wordEndsWith(rString.substr(nPos + 1), "t"))
+ eFrom = o3tl::Length::pt;
+ else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+ eFrom = o3tl::Length::pc;
+ break;
+ }
+ }
+ else if( MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit )
+ {
+ switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+ {
+ case u'c':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::cm;
+ break;
+ case u'i':
+ if (wordEndsWith(rString.substr(nPos + 1), "n"))
+ eFrom = o3tl::Length::in;
+ break;
+ case u'm':
+ if (wordEndsWith(rString.substr(nPos + 1), "m"))
+ eFrom = o3tl::Length::mm;
+ break;
+ case u'p':
+ if (wordEndsWith(rString.substr(nPos + 1), "t"))
+ eFrom = o3tl::Length::pt;
+ else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+ eFrom = o3tl::Length::pc;
+ else if (wordEndsWith(rString.substr(nPos + 1), "x"))
+ eFrom = o3tl::Length::px;
+ break;
+ }
+ }
+ else if( MeasureUnit::POINT == nTargetUnit )
+ {
+ if (wordEndsWith(rString.substr(nPos), "pt"))
+ eFrom = o3tl::Length::pt;
+ }
+
+ if (eFrom == o3tl::Length::invalid)
+ return false;
+
+ // TODO: check overflow
+ nVal = o3tl::convert(nVal, eFrom, Measure2O3tlUnit(nTargetUnit));
+ }
+ }
+
+ nVal += .5;
+ if( bNeg )
+ nVal = -nVal;
+
+ if( nVal <= static_cast<double>(nMin) )
+ rValue = nMin;
+ else if( nVal >= static_cast<double>(nMax) )
+ rValue = nMax;
+ else
+ rValue = static_cast<sal_Int32>(nVal);
+
+ return true;
+}
+
+/** convert string to measure using optional min and max values*/
+bool Converter::convertMeasure( sal_Int32& rValue,
+ std::u16string_view rString,
+ sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int32 nMin /* = SAL_MIN_INT32 */,
+ sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+{
+ return lcl_convertMeasure(rValue, rString, nTargetUnit, nMin, nMax);
+}
+
+/** convert string to measure using optional min and max values*/
+bool Converter::convertMeasure( sal_Int32& rValue,
+ std::string_view rString,
+ sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int32 nMin /* = SAL_MIN_INT32 */,
+ sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+{
+ return lcl_convertMeasure(rValue, rString, nTargetUnit, nMin, nMax);
+}
+
+
+/** convert measure in given unit to string with given unit */
+void Converter::convertMeasure( OUStringBuffer& rBuffer,
+ sal_Int32 nMeasure,
+ sal_Int16 nSourceUnit /* = MeasureUnit::MM_100TH */,
+ sal_Int16 nTargetUnit /* = MeasureUnit::INCH */ )
+{
+ if( nSourceUnit == MeasureUnit::PERCENT )
+ {
+ OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT,
+ "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" );
+
+ rBuffer.append( nMeasure );
+ rBuffer.append( '%' );
+
+ return;
+ }
+ sal_Int64 nValue(nMeasure); // extend to 64-bit first to avoid overflow
+ // the sign is processed separately
+ if (nValue < 0)
+ {
+ nValue = -nValue;
+ rBuffer.append( '-' );
+ }
+
+ o3tl::Length eFrom = o3tl::Length::in, eTo = o3tl::Length::in;
+ int nFac = 100; // used to get specific number of decimals (2 by default)
+ std::string_view psUnit;
+ switch( nSourceUnit )
+ {
+ case MeasureUnit::TWIP:
+ eFrom = o3tl::Length::twip;
+ switch( nTargetUnit )
+ {
+ case MeasureUnit::MM_100TH:
+ case MeasureUnit::MM_10TH:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,"output unit not supported for twip values" );
+ [[fallthrough]];
+ case MeasureUnit::MM:
+ eTo = o3tl::Length::mm;
+ nFac = 100;
+ psUnit = gpsMM;
+ break;
+
+ case MeasureUnit::CM:
+ eTo = o3tl::Length::cm;
+ nFac = 1000;
+ psUnit = gpsCM;
+ break;
+
+ case MeasureUnit::POINT:
+ eTo = o3tl::Length::pt;
+ nFac = 100;
+ psUnit = gpsPT;
+ break;
+
+ case MeasureUnit::INCH:
+ default:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,
+ "output unit not supported for twip values" );
+ nFac = 10000;
+ psUnit = gpsINCH;
+ break;
+ }
+ break;
+
+ case MeasureUnit::POINT:
+ // 1pt = 1pt (exactly)
+ OSL_ENSURE( MeasureUnit::POINT == nTargetUnit,
+ "output unit not supported for pt values" );
+ eFrom = eTo = o3tl::Length::pt;
+ nFac = 1;
+ psUnit = gpsPT;
+ break;
+ case MeasureUnit::MM_10TH:
+ case MeasureUnit::MM_100TH:
+ {
+ int nFac2 = (MeasureUnit::MM_100TH == nSourceUnit) ? 100 : 10;
+ eFrom = Measure2O3tlUnit(nSourceUnit);
+ switch( nTargetUnit )
+ {
+ case MeasureUnit::MM_100TH:
+ case MeasureUnit::MM_10TH:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,
+ "output unit not supported for 1/100mm values" );
+ [[fallthrough]];
+ case MeasureUnit::MM:
+ eTo = o3tl::Length::mm;
+ nFac = nFac2;
+ psUnit = gpsMM;
+ break;
+
+ case MeasureUnit::CM:
+ eTo = o3tl::Length::cm;
+ nFac = 10*nFac2;
+ psUnit = gpsCM;
+ break;
+
+ case MeasureUnit::POINT:
+ eTo = o3tl::Length::pt;
+ nFac = nFac2;
+ psUnit = gpsPT;
+ break;
+
+ case MeasureUnit::INCH:
+ default:
+ OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,
+ "output unit not supported for 1/100mm values" );
+ nFac = 100*nFac2;
+ psUnit = gpsINCH;
+ break;
+ }
+ break;
+ }
+ default:
+ OSL_ENSURE(false, "sax::Converter::convertMeasure(): "
+ "source unit not supported");
+ break;
+ }
+
+ nValue = o3tl::convert(nValue * nFac, eFrom, eTo);
+
+ rBuffer.append( static_cast<sal_Int64>(nValue / nFac) );
+ if (nFac > 1 && (nValue % nFac) != 0)
+ {
+ rBuffer.append( '.' );
+ while (nFac > 1 && (nValue % nFac) != 0)
+ {
+ nFac /= 10;
+ rBuffer.append( static_cast<sal_Int32>((nValue / nFac) % 10) );
+ }
+ }
+
+ if (psUnit.length() > 0)
+ rBuffer.appendAscii(psUnit.data(), psUnit.length());
+}
+
+/** convert string to boolean */
+bool Converter::convertBool( bool& rBool, std::u16string_view rString )
+{
+ rBool = rString == u"true";
+
+ return rBool || (rString == u"false");
+}
+
+/** convert string to boolean */
+bool Converter::convertBool( bool& rBool, std::string_view rString )
+{
+ rBool = rString == "true";
+
+ return rBool || (rString == "false");
+}
+
+/** convert boolean to string */
+void Converter::convertBool( OUStringBuffer& rBuffer, bool bValue )
+{
+ rBuffer.append( bValue );
+}
+
+/** convert string to percent */
+bool Converter::convertPercent( sal_Int32& rPercent, std::u16string_view rString )
+{
+ return convertMeasure( rPercent, rString, MeasureUnit::PERCENT );
+}
+
+/** convert string to percent */
+bool Converter::convertPercent( sal_Int32& rPercent, std::string_view rString )
+{
+ return convertMeasure( rPercent, rString, MeasureUnit::PERCENT );
+}
+
+/** convert percent to string */
+void Converter::convertPercent( OUStringBuffer& rBuffer, sal_Int32 nValue )
+{
+ rBuffer.append( nValue );
+ rBuffer.append( '%' );
+}
+
+/** convert string to pixel measure */
+bool Converter::convertMeasurePx( sal_Int32& rPixel, std::u16string_view rString )
+{
+ return convertMeasure( rPixel, rString, MeasureUnit::PIXEL );
+}
+
+/** convert string to pixel measure */
+bool Converter::convertMeasurePx( sal_Int32& rPixel, std::string_view rString )
+{
+ return convertMeasure( rPixel, rString, MeasureUnit::PIXEL );
+}
+
+/** convert pixel measure to string */
+void Converter::convertMeasurePx( OUStringBuffer& rBuffer, sal_Int32 nValue )
+{
+ rBuffer.append( nValue );
+ rBuffer.append( 'p' );
+ rBuffer.append( 'x' );
+}
+
+static int lcl_gethex( int nChar )
+{
+ if( nChar >= '0' && nChar <= '9' )
+ return nChar - '0';
+ else if( nChar >= 'a' && nChar <= 'f' )
+ return nChar - 'a' + 10;
+ else if( nChar >= 'A' && nChar <= 'F' )
+ return nChar - 'A' + 10;
+ else
+ return 0;
+}
+
+/** convert string to rgb color */
+template<typename V>
+static bool lcl_convertColor( sal_Int32& rColor, V rValue )
+{
+ if( rValue.size() != 7 || rValue[0] != '#' )
+ return false;
+
+ rColor = lcl_gethex( rValue[1] ) * 16 + lcl_gethex( rValue[2] );
+ rColor <<= 8;
+
+ rColor |= lcl_gethex( rValue[3] ) * 16 + lcl_gethex( rValue[4] );
+ rColor <<= 8;
+
+ rColor |= lcl_gethex( rValue[5] ) * 16 + lcl_gethex( rValue[6] );
+
+ return true;
+}
+
+/** convert string to rgb color */
+bool Converter::convertColor( sal_Int32& rColor, std::u16string_view rValue )
+{
+ return lcl_convertColor(rColor, rValue);
+}
+
+/** convert string to rgb color */
+bool Converter::convertColor( sal_Int32& rColor, std::string_view rValue )
+{
+ return lcl_convertColor(rColor, rValue);
+}
+
+const char aHexTab[] = "0123456789abcdef";
+
+/** convert color to string */
+void Converter::convertColor( OUStringBuffer& rBuffer, sal_Int32 nColor )
+{
+ rBuffer.append( '#' );
+
+ sal_uInt8 nCol = static_cast<sal_uInt8>(nColor >> 16);
+ rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) );
+ rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) );
+
+ nCol = static_cast<sal_uInt8>(nColor >> 8);
+ rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) );
+ rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) );
+
+ nCol = static_cast<sal_uInt8>(nColor);
+ rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) );
+ rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) );
+}
+
+/** convert string to number with optional min and max values */
+bool Converter::convertNumber( sal_Int32& rValue,
+ std::u16string_view aString,
+ sal_Int32 nMin, sal_Int32 nMax )
+{
+ rValue = 0;
+ sal_Int64 nNumber = 0;
+ bool bRet = convertNumber64(nNumber,aString,nMin,nMax);
+ if ( bRet )
+ rValue = static_cast<sal_Int32>(nNumber);
+ return bRet;
+}
+
+/** convert string to number with optional min and max values */
+bool Converter::convertNumber( sal_Int32& rValue,
+ std::string_view aString,
+ sal_Int32 nMin, sal_Int32 nMax )
+{
+ rValue = 0;
+ sal_Int64 nNumber = 0;
+ bool bRet = convertNumber64(nNumber,aString,nMin,nMax);
+ if ( bRet )
+ rValue = static_cast<sal_Int32>(nNumber);
+ return bRet;
+}
+
+/** convert string to 64-bit number with optional min and max values */
+template<typename V>
+static bool lcl_convertNumber64( sal_Int64& rValue,
+ V aString,
+ sal_Int64 nMin, sal_Int64 nMax )
+{
+ sal_Int32 nPos = 0;
+ sal_Int32 const nLen = aString.size();
+
+ // skip white space
+ while( (nPos < nLen) && (aString[nPos] <= ' ') )
+ nPos++;
+
+ sal_Int32 nNumberStartPos = nPos;
+
+ if( nPos < nLen && '-' == aString[nPos] )
+ {
+ nPos++;
+ }
+
+ // get number
+ while( nPos < nLen &&
+ '0' <= aString[nPos] &&
+ '9' >= aString[nPos] )
+ {
+ nPos++;
+ }
+
+ rValue = toInt64_WithLength(aString.data() + nNumberStartPos, 10, nPos - nNumberStartPos);
+
+ if( rValue < nMin )
+ rValue = nMin;
+ else if( rValue > nMax )
+ rValue = nMax;
+
+ return ( nPos == nLen && rValue >= nMin && rValue <= nMax );
+}
+
+/** convert string to 64-bit number with optional min and max values */
+bool Converter::convertNumber64( sal_Int64& rValue,
+ std::u16string_view aString,
+ sal_Int64 nMin, sal_Int64 nMax )
+{
+ return lcl_convertNumber64(rValue, aString, nMin, nMax);
+}
+
+/** convert string to 64-bit number with optional min and max values */
+bool Converter::convertNumber64( sal_Int64& rValue,
+ std::string_view aString,
+ sal_Int64 nMin, sal_Int64 nMax )
+{
+ return lcl_convertNumber64(rValue, aString, nMin, nMax);
+}
+
+
+/** convert double number to string (using ::rtl::math) */
+void Converter::convertDouble( OUStringBuffer& rBuffer,
+ double fNumber,
+ bool bWriteUnits,
+ sal_Int16 nSourceUnit,
+ sal_Int16 nTargetUnit)
+{
+ if(MeasureUnit::PERCENT == nSourceUnit)
+ {
+ OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT, "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" );
+ ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
+ if(bWriteUnits)
+ rBuffer.append('%');
+ }
+ else
+ {
+ OUStringBuffer sUnit;
+ double fFactor = GetConversionFactor(sUnit, nSourceUnit, nTargetUnit);
+ if(fFactor != 1.0)
+ fNumber *= fFactor;
+ ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
+ if(bWriteUnits)
+ rBuffer.append(sUnit);
+ }
+}
+
+/** convert double number to string (using ::rtl::math) */
+void Converter::convertDouble( OUStringBuffer& rBuffer, double fNumber)
+{
+ ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true);
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue,
+ std::u16string_view rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ if (!convertDouble(rValue, rString))
+ return false;
+
+ OUStringBuffer sUnit;
+ // fdo#48969: switch source and target because factor is used to divide!
+ double const fFactor =
+ GetConversionFactor(sUnit, nTargetUnit, nSourceUnit);
+ if(fFactor != 1.0 && fFactor != 0.0)
+ rValue /= fFactor;
+ return true;
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue,
+ std::string_view rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ if (!convertDouble(rValue, rString))
+ return false;
+
+ OStringBuffer sUnit;
+ // fdo#48969: switch source and target because factor is used to divide!
+ double const fFactor =
+ GetConversionFactor(sUnit, nTargetUnit, nSourceUnit);
+ if(fFactor != 1.0 && fFactor != 0.0)
+ rValue /= fFactor;
+ return true;
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue, std::u16string_view rString)
+{
+ rtl_math_ConversionStatus eStatus;
+ rValue = rtl_math_uStringToDouble(rString.data(),
+ rString.data() + rString.size(),
+ /*cDecSeparator*/'.', /*cGroupSeparator*/',',
+ &eStatus, nullptr);
+ return ( eStatus == rtl_math_ConversionStatus_Ok );
+}
+
+/** convert string to double number (using ::rtl::math) */
+bool Converter::convertDouble(double& rValue, std::string_view rString)
+{
+ rtl_math_ConversionStatus eStatus;
+ rValue = rtl_math_stringToDouble(rString.data(),
+ rString.data() + rString.size(),
+ /*cDecSeparator*/'.', /*cGroupSeparator*/',',
+ &eStatus, nullptr);
+ return ( eStatus == rtl_math_ConversionStatus_Ok );
+}
+
+/** convert number, 10th of degrees with range [0..3600] to SVG angle */
+void Converter::convertAngle(OUStringBuffer& rBuffer, sal_Int16 const nAngle,
+ SvtSaveOptions::ODFSaneDefaultVersion const nVersion)
+{
+ if (nVersion < SvtSaveOptions::ODFSVER_012 || nVersion == SvtSaveOptions::ODFSVER_012_EXT_COMPAT)
+ {
+ // wrong, but backward compatible with OOo/LO < 4.4
+ rBuffer.append(static_cast<sal_Int32>(nAngle));
+ }
+ else
+ { // OFFICE-3774 tdf#89475 write valid ODF 1.2 angle; needs LO 4.4 to import
+ double fAngle(double(nAngle) / 10.0);
+ ::sax::Converter::convertDouble(rBuffer, fAngle);
+ rBuffer.append("deg");
+ }
+}
+
+/** convert SVG angle to number, 10th of degrees with range [0..3600] */
+bool Converter::convertAngle(sal_Int16& rAngle, std::u16string_view rString,
+ bool const isWrongOOo10thDegAngle)
+{
+ // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's
+ // degrees, while OOo has historically used 10th of degrees :(
+ // So import degrees when we see the "deg" suffix but continue with 10th of
+ // degrees for now for the sake of existing OOo/LO documents, until the
+ // new versions that can read "deg" suffix are widely deployed and we can
+ // start to write the "deg" suffix.
+ sal_Int32 nValue(0);
+ double fValue(0.0);
+ bool bRet = ::sax::Converter::convertDouble(fValue, rString);
+ if (std::u16string_view::npos != rString.find(u"deg"))
+ {
+ nValue = fValue * 10.0;
+ }
+ else if (std::u16string_view::npos != rString.find(u"grad"))
+ {
+ nValue = (fValue * 9.0 / 10.0) * 10.0;
+ }
+ else if (std::u16string_view::npos != rString.find(u"rad"))
+ {
+ nValue = basegfx::rad2deg<10>(fValue);
+ }
+ else // no explicit unit
+ {
+ if (isWrongOOo10thDegAngle)
+ {
+ nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0
+ }
+ else
+ {
+ nValue = fValue * 10.0; // ODF 1.2
+ }
+ }
+ // limit to valid range [0..3600]
+ nValue = nValue % 3600;
+ if (nValue < 0)
+ {
+ nValue += 3600;
+ }
+ assert(0 <= nValue && nValue <= 3600);
+ if (bRet)
+ {
+ rAngle = sal::static_int_cast<sal_Int16>(nValue);
+ }
+ return bRet;
+}
+
+/** convert SVG angle to number, 10th of degrees with range [0..3600] */
+bool Converter::convertAngle(sal_Int16& rAngle, std::string_view rString,
+ bool const isWrongOOo10thDegAngle)
+{
+ // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's
+ // degrees, while OOo has historically used 10th of degrees :(
+ // So import degrees when we see the "deg" suffix but continue with 10th of
+ // degrees for now for the sake of existing OOo/LO documents, until the
+ // new versions that can read "deg" suffix are widely deployed and we can
+ // start to write the "deg" suffix.
+ sal_Int32 nValue(0);
+ double fValue(0.0);
+ bool bRet = ::sax::Converter::convertDouble(fValue, rString);
+ if (std::string_view::npos != rString.find("deg"))
+ {
+ nValue = fValue * 10.0;
+ }
+ else if (std::string_view::npos != rString.find("grad"))
+ {
+ nValue = (fValue * 9.0 / 10.0) * 10.0;
+ }
+ else if (std::string_view::npos != rString.find("rad"))
+ {
+ nValue = basegfx::rad2deg<10>(fValue);
+ }
+ else // no explicit unit
+ {
+ if (isWrongOOo10thDegAngle)
+ {
+ nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0
+ }
+ else
+ {
+ nValue = fValue * 10.0; // ODF 1.2
+ }
+ }
+ // limit to valid range [0..3600]
+ nValue = nValue % 3600;
+ if (nValue < 0)
+ {
+ nValue += 3600;
+ }
+ assert(0 <= nValue && nValue <= 3600);
+ if (bRet)
+ {
+ rAngle = sal::static_int_cast<sal_Int16>(nValue);
+ }
+ return bRet;
+}
+
+/** convert double to ISO "duration" string; negative durations allowed */
+void Converter::convertDuration(OUStringBuffer& rBuffer,
+ const double fTime)
+{
+ double fValue = fTime;
+
+ // take care of negative durations as specified in:
+ // XML Schema, W3C Working Draft 07 April 2000, section 3.2.6.1
+ if (fValue < 0.0)
+ {
+ rBuffer.append('-');
+ fValue = - fValue;
+ }
+
+ rBuffer.append( "PT" );
+ fValue *= 24;
+ double fHoursValue = ::rtl::math::approxFloor (fValue);
+ fValue -= fHoursValue;
+ fValue *= 60;
+ double fMinsValue = ::rtl::math::approxFloor (fValue);
+ fValue -= fMinsValue;
+ fValue *= 60;
+ double fSecsValue = ::rtl::math::approxFloor (fValue);
+ fValue -= fSecsValue;
+ double fNanoSecsValue;
+ if (fValue > 0.00000000001)
+ fNanoSecsValue = ::rtl::math::round( fValue, XML_MAXDIGITSCOUNT_TIME - 5);
+ else
+ fNanoSecsValue = 0.0;
+
+ if (fNanoSecsValue == 1.0)
+ {
+ fNanoSecsValue = 0.0;
+ fSecsValue += 1.0;
+ }
+ if (fSecsValue >= 60.0)
+ {
+ fSecsValue -= 60.0;
+ fMinsValue += 1.0;
+ }
+ if (fMinsValue >= 60.0)
+ {
+ fMinsValue -= 60.0;
+ fHoursValue += 1.0;
+ }
+
+ if (fHoursValue < 10)
+ rBuffer.append( '0');
+ rBuffer.append( sal_Int32( fHoursValue));
+ rBuffer.append( 'H');
+ if (fMinsValue < 10)
+ rBuffer.append( '0');
+ rBuffer.append( sal_Int32( fMinsValue));
+ rBuffer.append( 'M');
+ if (fSecsValue < 10)
+ rBuffer.append( '0');
+ rBuffer.append( sal_Int32( fSecsValue));
+ if (fNanoSecsValue > 0.0)
+ {
+ OUString aNS( ::rtl::math::doubleToUString( fValue,
+ rtl_math_StringFormat_F, XML_MAXDIGITSCOUNT_TIME - 5, '.',
+ true));
+ if ( aNS.getLength() > 2 )
+ {
+ rBuffer.append( '.');
+ rBuffer.append( aNS.subView(2) ); // strip "0."
+ }
+ }
+ rBuffer.append( 'S');
+}
+
+static std::u16string_view trim(std::u16string_view in) {
+ auto left = in.begin();
+ for (;; ++left) {
+ if (left == in.end())
+ return std::u16string_view();
+ if (!isspace(*left))
+ break;
+ }
+ auto right = in.end() - 1;
+ for (; right > left && isspace(*right); --right);
+ return std::u16string_view(&*left, std::distance(left, right) + 1);
+}
+
+static std::string_view trim(std::string_view in) {
+ auto left = in.begin();
+ for (;; ++left) {
+ if (left == in.end())
+ return std::string_view();
+ if (!isspace(*left))
+ break;
+ }
+ auto right = in.end() - 1;
+ for (; right > left && isspace(*right); --right);
+ return std::string_view(&*left, std::distance(left, right) + 1);
+}
+
+/** helper function of Converter::convertDuration */
+template<typename V>
+static bool convertDurationHelper(double& rfTime, V pStr)
+{
+ // negative time duration?
+ bool bIsNegativeDuration = false;
+ if ( '-' == (*pStr) )
+ {
+ bIsNegativeDuration = true;
+ pStr++;
+ }
+
+ if ( *pStr != 'P' && *pStr != 'p' ) // duration must start with "P"
+ return false;
+ pStr++;
+
+ OUStringBuffer sDoubleStr;
+ bool bSuccess = true;
+ bool bDone = false;
+ bool bTimePart = false;
+ bool bIsFraction = false;
+ sal_Int32 nDays = 0;
+ sal_Int32 nHours = 0;
+ sal_Int32 nMins = 0;
+ sal_Int32 nSecs = 0;
+ sal_Int32 nTemp = 0;
+
+ while ( bSuccess && !bDone )
+ {
+ sal_Unicode c = *(pStr++);
+ if ( !c ) // end
+ bDone = true;
+ else if ( '0' <= c && '9' >= c )
+ {
+ if ( nTemp >= SAL_MAX_INT32 / 10 )
+ bSuccess = false;
+ else
+ {
+ if ( !bIsFraction )
+ {
+ nTemp *= 10;
+ nTemp += (c - u'0');
+ }
+ else
+ {
+ sDoubleStr.append(c);
+ }
+ }
+ }
+ else if ( bTimePart )
+ {
+ if ( c == 'H' || c == 'h' )
+ {
+ nHours = nTemp;
+ nTemp = 0;
+ }
+ else if ( c == 'M' || c == 'm')
+ {
+ nMins = nTemp;
+ nTemp = 0;
+ }
+ else if ( (c == ',') || (c == '.') )
+ {
+ nSecs = nTemp;
+ nTemp = 0;
+ bIsFraction = true;
+ sDoubleStr = "0.";
+ }
+ else if ( c == 'S' || c == 's' )
+ {
+ if ( !bIsFraction )
+ {
+ nSecs = nTemp;
+ nTemp = 0;
+ sDoubleStr = "0.0";
+ }
+ }
+ else
+ bSuccess = false; // invalid character
+ }
+ else
+ {
+ if ( c == 'T' || c == 't' ) // "T" starts time part
+ bTimePart = true;
+ else if ( c == 'D' || c == 'd')
+ {
+ nDays = nTemp;
+ nTemp = 0;
+ }
+ else if ( c == 'Y' || c == 'y' || c == 'M' || c == 'm' )
+ {
+ //! how many days is a year or month?
+
+ OSL_FAIL( "years or months in duration: not implemented");
+ bSuccess = false;
+ }
+ else
+ bSuccess = false; // invalid character
+ }
+ }
+
+ if ( bSuccess )
+ {
+ if ( nDays )
+ nHours += nDays * 24; // add the days to the hours part
+ double fHour = nHours;
+ double fMin = nMins;
+ double fSec = nSecs;
+ double fFraction = sDoubleStr.makeStringAndClear().toDouble();
+ double fTempTime = fHour / 24;
+ fTempTime += fMin / (24 * 60);
+ fTempTime += fSec / (24 * 60 * 60);
+ fTempTime += fFraction / (24 * 60 * 60);
+
+ // negative duration?
+ if ( bIsNegativeDuration )
+ {
+ fTempTime = -fTempTime;
+ }
+
+ rfTime = fTempTime;
+ }
+ return bSuccess;
+}
+
+/** convert ISO "duration" string to double; negative durations allowed */
+bool Converter::convertDuration(double& rfTime,
+ std::string_view rString)
+{
+ std::string_view aTrimmed = trim(rString);
+ const char* pStr = aTrimmed.data();
+
+ return convertDurationHelper(rfTime, pStr);
+}
+
+/** convert util::Duration to ISO8601 "duration" string */
+void Converter::convertDuration(OUStringBuffer& rBuffer,
+ const ::util::Duration& rDuration)
+{
+ if (rDuration.Negative)
+ {
+ rBuffer.append('-');
+ }
+ rBuffer.append('P');
+ const bool bHaveDate(rDuration.Years != 0 ||
+ rDuration.Months != 0 ||
+ rDuration.Days != 0);
+ if (rDuration.Years)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Years));
+ rBuffer.append('Y');
+ }
+ if (rDuration.Months)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Months));
+ rBuffer.append('M');
+ }
+ if (rDuration.Days)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Days));
+ rBuffer.append('D');
+ }
+ if ( rDuration.Hours != 0
+ || rDuration.Minutes != 0
+ || rDuration.Seconds != 0
+ || rDuration.NanoSeconds != 0 )
+ {
+ rBuffer.append('T'); // time separator
+ if (rDuration.Hours)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Hours));
+ rBuffer.append('H');
+ }
+ if (rDuration.Minutes)
+ {
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Minutes));
+ rBuffer.append('M');
+ }
+ if (rDuration.Seconds != 0 || rDuration.NanoSeconds != 0)
+ {
+ // seconds must not be omitted (i.e. ".42S" is not valid)
+ rBuffer.append(static_cast<sal_Int32>(rDuration.Seconds));
+ if (rDuration.NanoSeconds)
+ {
+ OSL_ENSURE(rDuration.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999");
+ rBuffer.append('.');
+ std::ostringstream ostr;
+ ostr.fill('0');
+ ostr.width(9);
+ ostr << rDuration.NanoSeconds;
+ rBuffer.append(OUString::createFromAscii(ostr.str().c_str()));
+ }
+ rBuffer.append('S');
+ }
+ }
+ else if (!bHaveDate)
+ {
+ // zero duration: XMLSchema-2 says there must be at least one component
+ rBuffer.append('0');
+ rBuffer.append('D');
+ }
+}
+
+namespace {
+
+enum Result { R_NOTHING, R_OVERFLOW, R_SUCCESS };
+
+}
+
+template <typename V>
+static Result
+readUnsignedNumber(V rString,
+ size_t & io_rnPos, sal_Int32 & o_rNumber)
+{
+ size_t nPos(io_rnPos);
+
+ while (nPos < rString.size())
+ {
+ const typename V::value_type c = rString[nPos];
+ if (('0' > c) || (c > '9'))
+ break;
+ ++nPos;
+ }
+
+ if (io_rnPos == nPos) // read something?
+ {
+ o_rNumber = -1;
+ return R_NOTHING;
+ }
+
+ const sal_Int64 nTemp = toInt64_WithLength(rString.data() + io_rnPos, 10, nPos - io_rnPos);
+
+ const bool bOverflow = (nTemp >= SAL_MAX_INT32);
+
+ io_rnPos = nPos;
+ o_rNumber = nTemp;
+ return bOverflow ? R_OVERFLOW : R_SUCCESS;
+}
+
+template<typename V>
+static Result
+readUnsignedNumberMaxDigits(int maxDigits,
+ V rString, size_t & io_rnPos,
+ sal_Int32 & o_rNumber)
+{
+ bool bOverflow(false);
+ sal_Int64 nTemp(0);
+ size_t nPos(io_rnPos);
+ OSL_ENSURE(maxDigits >= 0, "negative amount of digits makes no sense");
+
+ while (nPos < rString.size())
+ {
+ const sal_Unicode c = rString[nPos];
+ if (('0' <= c) && (c <= '9'))
+ {
+ if (maxDigits > 0)
+ {
+ nTemp *= 10;
+ nTemp += (c - u'0');
+ if (nTemp >= SAL_MAX_INT32)
+ {
+ bOverflow = true;
+ }
+ --maxDigits;
+ }
+ }
+ else
+ {
+ break;
+ }
+ ++nPos;
+ }
+
+ if (io_rnPos == nPos) // read something?
+ {
+ o_rNumber = -1;
+ return R_NOTHING;
+ }
+
+ io_rnPos = nPos;
+ o_rNumber = nTemp;
+ return bOverflow ? R_OVERFLOW : R_SUCCESS;
+}
+
+template<typename V>
+static bool
+readDurationT(V rString, size_t & io_rnPos)
+{
+ if ((io_rnPos < rString.size()) &&
+ (rString[io_rnPos] == 'T' || rString[io_rnPos] == 't'))
+ {
+ ++io_rnPos;
+ return true;
+ }
+ return false;
+}
+
+template<typename V>
+static bool
+readDurationComponent(V rString,
+ size_t & io_rnPos, sal_Int32 & io_rnTemp, bool & io_rbTimePart,
+ sal_Int32 & o_rnTarget, const sal_Unicode cLower, const sal_Unicode cUpper)
+{
+ if (io_rnPos < rString.size())
+ {
+ if (cLower == rString[io_rnPos] || cUpper == rString[io_rnPos])
+ {
+ ++io_rnPos;
+ if (-1 != io_rnTemp)
+ {
+ o_rnTarget = io_rnTemp;
+ io_rnTemp = -1;
+ if (!io_rbTimePart)
+ {
+ io_rbTimePart = readDurationT(rString, io_rnPos);
+ }
+ return (R_OVERFLOW !=
+ readUnsignedNumber(rString, io_rnPos, io_rnTemp));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/** convert ISO8601 "duration" string to util::Duration */
+bool Converter::convertDuration(util::Duration& rDuration,
+ std::u16string_view rString)
+{
+ std::u16string_view string = trim(rString);
+ size_t nPos(0);
+
+ bool bIsNegativeDuration(false);
+ if (!string.empty() && ('-' == string[0]))
+ {
+ bIsNegativeDuration = true;
+ ++nPos;
+ }
+
+ if (nPos < string.size()
+ && string[nPos] != 'P' && string[nPos] != 'p') // duration must start with "P"
+ {
+ return false;
+ }
+
+ ++nPos;
+
+ /// last read number; -1 == no valid number! always reset after using!
+ sal_Int32 nTemp(-1);
+ bool bTimePart(false); // have we read 'T'?
+ bool bSuccess(false);
+ sal_Int32 nYears(0);
+ sal_Int32 nMonths(0);
+ sal_Int32 nDays(0);
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nNanoSeconds(0);
+
+ bTimePart = readDurationT(string, nPos);
+ bSuccess = (R_SUCCESS == readUnsignedNumber(string, nPos, nTemp));
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nYears, 'y', 'Y');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMonths, 'm', 'M');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nDays, 'd', 'D');
+ }
+
+ if (bTimePart)
+ {
+ if (-1 == nTemp) // a 'T' must be followed by a component
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nHours, 'h', 'H');
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMinutes, 'm', 'M');
+ }
+
+ // eeek! seconds are icky.
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (string[nPos] == '.' ||
+ string[nPos] == ',')
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ const sal_Int32 nStart(nPos);
+ bSuccess = readUnsignedNumberMaxDigits(9, string, nPos, nTemp) == R_SUCCESS;
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (-1 != nTemp)
+ {
+ nNanoSeconds = nTemp;
+ sal_Int32 nDigits = nPos - nStart;
+ assert(nDigits >= 0);
+ for (; nDigits < 9; ++nDigits)
+ {
+ nNanoSeconds *= 10;
+ }
+ nTemp=-1;
+ if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ }
+
+ if (nPos != string.size()) // string not processed completely?
+ {
+ bSuccess = false;
+ }
+
+ if (nTemp != -1) // unprocessed number?
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ rDuration.Negative = bIsNegativeDuration;
+ rDuration.Years = static_cast<sal_Int16>(nYears);
+ rDuration.Months = static_cast<sal_Int16>(nMonths);
+ rDuration.Days = static_cast<sal_Int16>(nDays);
+ rDuration.Hours = static_cast<sal_Int16>(nHours);
+ rDuration.Minutes = static_cast<sal_Int16>(nMinutes);
+ rDuration.Seconds = static_cast<sal_Int16>(nSeconds);
+ rDuration.NanoSeconds = nNanoSeconds;
+ }
+
+ return bSuccess;
+}
+
+/** convert ISO8601 "duration" string to util::Duration */
+bool Converter::convertDuration(util::Duration& rDuration,
+ std::string_view rString)
+{
+ std::string_view string = trim(rString);
+ size_t nPos(0);
+
+ bool bIsNegativeDuration(false);
+ if (!string.empty() && ('-' == string[0]))
+ {
+ bIsNegativeDuration = true;
+ ++nPos;
+ }
+
+ if (nPos < string.size()
+ && string[nPos] != 'P' && string[nPos] != 'p') // duration must start with "P"
+ {
+ return false;
+ }
+
+ ++nPos;
+
+ /// last read number; -1 == no valid number! always reset after using!
+ sal_Int32 nTemp(-1);
+ bool bTimePart(false); // have we read 'T'?
+ bool bSuccess(false);
+ sal_Int32 nYears(0);
+ sal_Int32 nMonths(0);
+ sal_Int32 nDays(0);
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nNanoSeconds(0);
+
+ bTimePart = readDurationT(string, nPos);
+ bSuccess = (R_SUCCESS == readUnsignedNumber(string, nPos, nTemp));
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nYears, 'y', 'Y');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMonths, 'm', 'M');
+ }
+
+ if (!bTimePart && bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nDays, 'd', 'D');
+ }
+
+ if (bTimePart)
+ {
+ if (-1 == nTemp) // a 'T' must be followed by a component
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nHours, 'h', 'H');
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart,
+ nMinutes, 'm', 'M');
+ }
+
+ // eeek! seconds are icky.
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (string[nPos] == '.' ||
+ string[nPos] == ',')
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ const sal_Int32 nStart(nPos);
+ bSuccess = readUnsignedNumberMaxDigits(9, string, nPos, nTemp) == R_SUCCESS;
+ if ((nPos < string.size()) && bSuccess)
+ {
+ if (-1 != nTemp)
+ {
+ nNanoSeconds = nTemp;
+ sal_Int32 nDigits = nPos - nStart;
+ assert(nDigits >= 0);
+ for (; nDigits < 9; ++nDigits)
+ {
+ nNanoSeconds *= 10;
+ }
+ nTemp=-1;
+ if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ else if ('S' == string[nPos] || 's' == string[nPos])
+ {
+ ++nPos;
+ if (-1 != nTemp)
+ {
+ nSeconds = nTemp;
+ nTemp = -1;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ }
+ }
+
+ if (nPos != string.size()) // string not processed completely?
+ {
+ bSuccess = false;
+ }
+
+ if (nTemp != -1) // unprocessed number?
+ {
+ bSuccess = false;
+ }
+
+ if (bSuccess)
+ {
+ rDuration.Negative = bIsNegativeDuration;
+ rDuration.Years = static_cast<sal_Int16>(nYears);
+ rDuration.Months = static_cast<sal_Int16>(nMonths);
+ rDuration.Days = static_cast<sal_Int16>(nDays);
+ rDuration.Hours = static_cast<sal_Int16>(nHours);
+ rDuration.Minutes = static_cast<sal_Int16>(nMinutes);
+ rDuration.Seconds = static_cast<sal_Int16>(nSeconds);
+ rDuration.NanoSeconds = nNanoSeconds;
+ }
+
+ return bSuccess;
+}
+
+static void
+lcl_AppendTimezone(OUStringBuffer & i_rBuffer, int const nOffset)
+{
+ if (0 == nOffset)
+ {
+ i_rBuffer.append('Z');
+ }
+ else
+ {
+ if (0 < nOffset)
+ {
+ i_rBuffer.append('+');
+ }
+ else
+ {
+ i_rBuffer.append('-');
+ }
+ const sal_Int32 nHours (abs(nOffset) / 60);
+ const sal_Int32 nMinutes(abs(nOffset) % 60);
+ SAL_WARN_IF(nHours > 14 || (nHours == 14 && nMinutes > 0),
+ "sax", "convertDateTime: timezone overflow");
+ if (nHours < 10)
+ {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append(nHours);
+ i_rBuffer.append(':');
+ if (nMinutes < 10)
+ {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append(nMinutes);
+ }
+}
+
+/** convert util::Date to ISO "date" string */
+void Converter::convertDate(
+ OUStringBuffer& i_rBuffer,
+ const util::Date& i_rDate,
+ sal_Int16 const*const pTimeZoneOffset)
+{
+ const util::DateTime dt(0, 0, 0, 0,
+ i_rDate.Day, i_rDate.Month, i_rDate.Year, false);
+ convertDateTime(i_rBuffer, dt, pTimeZoneOffset);
+}
+
+static void convertTime(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime)
+{
+ if (i_rDateTime.Hours < 10) {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Hours) )
+ .append(':');
+ if (i_rDateTime.Minutes < 10) {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Minutes) )
+ .append(':');
+ if (i_rDateTime.Seconds < 10) {
+ i_rBuffer.append('0');
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Seconds) );
+ if (i_rDateTime.NanoSeconds > 0) {
+ OSL_ENSURE(i_rDateTime.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999");
+ i_rBuffer.append('.');
+ std::ostringstream ostr;
+ ostr.fill('0');
+ ostr.width(9);
+ ostr << i_rDateTime.NanoSeconds;
+ i_rBuffer.append(OUString::createFromAscii(ostr.str().c_str()));
+ }
+}
+
+static void convertTimeZone(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime,
+ sal_Int16 const* pTimeZoneOffset)
+{
+ if (pTimeZoneOffset)
+ {
+ lcl_AppendTimezone(i_rBuffer, *pTimeZoneOffset);
+ }
+ else if (i_rDateTime.IsUTC)
+ {
+ lcl_AppendTimezone(i_rBuffer, 0);
+ }
+}
+
+/** convert util::DateTime to ISO "time" or "dateTime" string */
+void Converter::convertTimeOrDateTime(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime)
+{
+ if (i_rDateTime.Year == 0 ||
+ i_rDateTime.Month < 1 || i_rDateTime.Month > 12 ||
+ i_rDateTime.Day < 1 || i_rDateTime.Day > 31)
+ {
+ convertTime(i_rBuffer, i_rDateTime);
+ convertTimeZone(i_rBuffer, i_rDateTime, nullptr);
+ }
+ else
+ {
+ convertDateTime(i_rBuffer, i_rDateTime, nullptr, true);
+ }
+}
+
+/** convert util::DateTime to ISO "date" or "dateTime" string */
+void Converter::convertDateTime(
+ OUStringBuffer& i_rBuffer,
+ const css::util::DateTime& i_rDateTime,
+ sal_Int16 const*const pTimeZoneOffset,
+ bool i_bAddTimeIf0AM )
+{
+ const sal_Unicode dash('-');
+ const sal_Unicode zero('0');
+
+ sal_Int32 const nYear(abs(i_rDateTime.Year));
+ if (i_rDateTime.Year < 0) {
+ i_rBuffer.append(dash); // negative
+ }
+ if (nYear < 1000) {
+ i_rBuffer.append(zero);
+ }
+ if (nYear < 100) {
+ i_rBuffer.append(zero);
+ }
+ if (nYear < 10) {
+ i_rBuffer.append(zero);
+ }
+ i_rBuffer.append( OUString::number(nYear) + OUStringChar(dash) );
+ if( i_rDateTime.Month < 10 ) {
+ i_rBuffer.append(zero);
+ }
+ i_rBuffer.append( OUString::number(i_rDateTime.Month) + OUStringChar(dash) );
+ if( i_rDateTime.Day < 10 ) {
+ i_rBuffer.append(zero);
+ }
+ i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Day) );
+
+ if( i_rDateTime.Seconds != 0 ||
+ i_rDateTime.Minutes != 0 ||
+ i_rDateTime.Hours != 0 ||
+ i_bAddTimeIf0AM )
+ {
+ i_rBuffer.append('T');
+ convertTime(i_rBuffer, i_rDateTime);
+ }
+
+ convertTimeZone(i_rBuffer, i_rDateTime, pTimeZoneOffset);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime */
+bool Converter::parseDateTime( util::DateTime& rDateTime,
+ std::u16string_view rString )
+{
+ bool isDateTime;
+ return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr,
+ rString);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime */
+bool Converter::parseDateTime( util::DateTime& rDateTime,
+ std::string_view rString )
+{
+ bool isDateTime;
+ return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr,
+ rString);
+}
+
+static bool lcl_isLeapYear(const sal_uInt32 nYear)
+{
+ return ((nYear % 4) == 0)
+ && (((nYear % 100) != 0) || ((nYear % 400) == 0));
+}
+
+static sal_uInt16
+lcl_MaxDaysPerMonth(const sal_Int32 nMonth, const sal_Int32 nYear)
+{
+ static const sal_uInt16 s_MaxDaysPerMonth[12] =
+ { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+ assert(0 < nMonth && nMonth <= 12);
+ if ((2 == nMonth) && lcl_isLeapYear(nYear))
+ {
+ return 29;
+ }
+ return s_MaxDaysPerMonth[nMonth - 1];
+}
+
+static void lcl_ConvertToUTC(
+ sal_Int16 & o_rYear, sal_uInt16 & o_rMonth, sal_uInt16 & o_rDay,
+ sal_uInt16 & o_rHours, sal_uInt16 & o_rMinutes,
+ int const nSourceOffset)
+{
+ sal_Int16 nOffsetHours(abs(nSourceOffset) / 60);
+ sal_Int16 const nOffsetMinutes(abs(nSourceOffset) % 60);
+ o_rMinutes += nOffsetMinutes;
+ if (nSourceOffset < 0)
+ {
+ o_rMinutes += nOffsetMinutes;
+ if (60 <= o_rMinutes)
+ {
+ o_rMinutes -= 60;
+ ++nOffsetHours;
+ }
+ o_rHours += nOffsetHours;
+ if (o_rHours < 24)
+ {
+ return;
+ }
+ sal_Int16 nDayAdd(0);
+ while (24 <= o_rHours)
+ {
+ o_rHours -= 24;
+ ++nDayAdd;
+ }
+ if (o_rDay == 0)
+ {
+ return; // handle time without date - don't adjust what isn't there
+ }
+ o_rDay += nDayAdd;
+ sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(o_rMonth, o_rYear));
+ if (o_rDay <= nDaysInMonth)
+ {
+ return;
+ }
+ o_rDay -= nDaysInMonth;
+ ++o_rMonth;
+ if (o_rMonth <= 12)
+ {
+ return;
+ }
+ o_rMonth = 1;
+ ++o_rYear; // works for negative year too
+ }
+ else if (0 < nSourceOffset)
+ {
+ // argh everything is unsigned
+ if (o_rMinutes < nOffsetMinutes)
+ {
+ o_rMinutes += 60;
+ ++nOffsetHours;
+ }
+ o_rMinutes -= nOffsetMinutes;
+ sal_Int16 nDaySubtract(0);
+ while (o_rHours < nOffsetHours)
+ {
+ o_rHours += 24;
+ ++nDaySubtract;
+ }
+ o_rHours -= nOffsetHours;
+ if (o_rDay == 0)
+ {
+ return; // handle time without date - don't adjust what isn't there
+ }
+ if (nDaySubtract < o_rDay)
+ {
+ o_rDay -= nDaySubtract;
+ return;
+ }
+ sal_Int16 const nPrevMonth((o_rMonth == 1) ? 12 : o_rMonth - 1);
+ sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(nPrevMonth, o_rYear));
+ o_rDay += nDaysInMonth;
+ --o_rMonth;
+ if (0 == o_rMonth)
+ {
+ o_rMonth = 12;
+ --o_rYear; // works for negative year too
+ }
+ o_rDay -= nDaySubtract;
+ }
+}
+
+template <typename V>
+static bool
+readDateTimeComponent(V rString,
+ size_t & io_rnPos, sal_Int32 & o_rnTarget,
+ const sal_Int32 nMinLength, const bool bExactLength)
+{
+ const size_t nOldPos(io_rnPos);
+ sal_Int32 nTemp(0);
+ if (R_SUCCESS != readUnsignedNumber<V>(rString, io_rnPos, nTemp))
+ {
+ return false;
+ }
+ const sal_Int32 nTokenLength(io_rnPos - nOldPos);
+ if ((nTokenLength < nMinLength) ||
+ (bExactLength && (nTokenLength > nMinLength)))
+ {
+ return false; // bad length
+ }
+ o_rnTarget = nTemp;
+ return true;
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+template<typename V>
+static bool lcl_parseDate(
+ bool & isNegative,
+ sal_Int32 & nYear, sal_Int32 & nMonth, sal_Int32 & nDay,
+ bool & bHaveTime,
+ size_t & nPos,
+ V string,
+ bool const bIgnoreInvalidOrMissingDate)
+{
+ bool bSuccess = true;
+
+ if (string.size() > nPos)
+ {
+ if ('-' == string[nPos])
+ {
+ isNegative = true;
+ ++nPos;
+ }
+ }
+
+ {
+ // While W3C XMLSchema specifies years with a minimum of 4 digits, be
+ // lenient in what we accept for years < 1000. One digit is acceptable
+ // if the remainders match.
+ bSuccess = readDateTimeComponent<V>(string, nPos, nYear, 1, false);
+ if (!bIgnoreInvalidOrMissingDate)
+ {
+ bSuccess &= (0 < nYear);
+ }
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && ('-' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent<V>(string, nPos, nMonth, 2, true);
+ if (!bIgnoreInvalidOrMissingDate)
+ {
+ bSuccess &= (0 < nMonth);
+ }
+ bSuccess &= (nMonth <= 12);
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && ('-' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent(string, nPos, nDay, 2, true);
+ if (!bIgnoreInvalidOrMissingDate)
+ {
+ bSuccess &= (0 < nDay);
+ }
+ if (nMonth > 0) // not possible to check if month was missing
+ {
+ bSuccess &= (nDay <= lcl_MaxDaysPerMonth(nMonth, nYear));
+ }
+ else assert(bIgnoreInvalidOrMissingDate);
+ }
+
+ if (bSuccess && (nPos < string.size()))
+ {
+ if ('T' == string[nPos] || 't' == string[nPos]) // time separator
+ {
+ bHaveTime = true;
+ ++nPos;
+ }
+ }
+
+ return bSuccess;
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+template <typename V>
+static bool lcl_parseDateTime(
+ util::Date *const pDate, util::DateTime & rDateTime,
+ bool & rbDateTime,
+ std::optional<sal_Int16> *const pTimeZoneOffset,
+ V string,
+ bool const bIgnoreInvalidOrMissingDate)
+{
+ bool bSuccess = true;
+
+ string = trim(string);
+
+ bool isNegative(false);
+ sal_Int32 nYear(0);
+ sal_Int32 nMonth(0);
+ sal_Int32 nDay(0);
+ size_t nPos(0);
+ bool bHaveTime(false);
+
+ if ( !bIgnoreInvalidOrMissingDate
+ || string.find(':') == V::npos // no time?
+ || (string.find('-') != V::npos
+ && string.find('-') < string.find(':')))
+ {
+ bSuccess &= lcl_parseDate<V>(isNegative, nYear, nMonth, nDay,
+ bHaveTime, nPos, string, bIgnoreInvalidOrMissingDate);
+ }
+ else
+ {
+ bHaveTime = true;
+ }
+
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nNanoSeconds(0);
+ if (bSuccess && bHaveTime)
+ {
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nHours, 2, true);
+ bSuccess &= (0 <= nHours) && (nHours <= 24);
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && (':' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent(string, nPos, nMinutes, 2, true);
+ bSuccess &= (0 <= nMinutes) && (nMinutes < 60);
+ bSuccess &= (nPos < string.size()); // not last token
+ }
+ if (bSuccess && (':' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent(string, nPos, nSeconds, 2, true);
+ bSuccess &= (0 <= nSeconds) && (nSeconds < 60);
+ }
+ if (bSuccess && (nPos < string.size()) &&
+ ('.' == string[nPos] || ',' == string[nPos])) // fraction separator
+ {
+ ++nPos;
+ const sal_Int32 nStart(nPos);
+ sal_Int32 nTemp(0);
+ if (R_NOTHING == readUnsignedNumberMaxDigits<V>(9, string, nPos, nTemp))
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ sal_Int32 nDigits = std::min<sal_Int32>(nPos - nStart, 9);
+ assert(nDigits > 0);
+ for (; nDigits < 9; ++nDigits)
+ {
+ nTemp *= 10;
+ }
+ nNanoSeconds = nTemp;
+ }
+ }
+
+ if (bSuccess && (nHours == 24))
+ {
+ if (!((0 == nMinutes) && (0 == nSeconds) && (0 == nNanoSeconds)))
+ {
+ bSuccess = false; // only 24:00:00 is valid
+ }
+ }
+ }
+
+ bool bHaveTimezone(false);
+ bool bHaveTimezonePlus(false);
+ bool bHaveTimezoneMinus(false);
+ if (bSuccess && (nPos < string.size()))
+ {
+ const sal_Unicode c(string[nPos]);
+ if ('+' == c)
+ {
+ bHaveTimezone = true;
+ bHaveTimezonePlus = true;
+ ++nPos;
+ }
+ else if ('-' == c)
+ {
+ bHaveTimezone = true;
+ bHaveTimezoneMinus = true;
+ ++nPos;
+ }
+ else if ('Z' == c || 'z' == c)
+ {
+ bHaveTimezone = true;
+ ++nPos;
+ }
+ else
+ {
+ bSuccess = false;
+ }
+ }
+ sal_Int32 nTimezoneHours(0);
+ sal_Int32 nTimezoneMinutes(0);
+ if (bSuccess && (bHaveTimezonePlus || bHaveTimezoneMinus))
+ {
+ bSuccess = readDateTimeComponent<V>(
+ string, nPos, nTimezoneHours, 2, true);
+ bSuccess &= (0 <= nTimezoneHours) && (nTimezoneHours <= 14);
+ bSuccess &= (nPos < string.size()); // not last token
+ if (bSuccess && (':' != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+
+ bSuccess = readDateTimeComponent<V>(
+ string, nPos, nTimezoneMinutes, 2, true);
+ bSuccess &= (0 <= nTimezoneMinutes) && (nTimezoneMinutes < 60);
+ }
+ if (bSuccess && (nTimezoneHours == 14))
+ {
+ if (0 != nTimezoneMinutes)
+ {
+ bSuccess = false; // only +-14:00 is valid
+ }
+ }
+ }
+
+ bSuccess &= (nPos == string.size()); // trailing junk?
+
+ if (bSuccess)
+ {
+ sal_Int16 const nTimezoneOffset = (bHaveTimezoneMinus ? -1 : +1)
+ * ((nTimezoneHours * 60) + nTimezoneMinutes);
+ if (!pDate || bHaveTime) // time is optional
+ {
+ rDateTime.Year =
+ (isNegative ? -1 : +1) * static_cast<sal_Int16>(nYear);
+ rDateTime.Month = static_cast<sal_uInt16>(nMonth);
+ rDateTime.Day = static_cast<sal_uInt16>(nDay);
+ rDateTime.Hours = static_cast<sal_uInt16>(nHours);
+ rDateTime.Minutes = static_cast<sal_uInt16>(nMinutes);
+ rDateTime.Seconds = static_cast<sal_uInt16>(nSeconds);
+ rDateTime.NanoSeconds = static_cast<sal_uInt32>(nNanoSeconds);
+ if (bHaveTimezone)
+ {
+ if (pTimeZoneOffset)
+ {
+ *pTimeZoneOffset = nTimezoneOffset;
+ rDateTime.IsUTC = (0 == nTimezoneOffset);
+ }
+ else
+ {
+ lcl_ConvertToUTC(rDateTime.Year, rDateTime.Month,
+ rDateTime.Day, rDateTime.Hours, rDateTime.Minutes,
+ nTimezoneOffset);
+ rDateTime.IsUTC = true;
+ }
+ }
+ else
+ {
+ if (pTimeZoneOffset)
+ {
+ pTimeZoneOffset->reset();
+ }
+ rDateTime.IsUTC = false;
+ }
+ rbDateTime = bHaveTime;
+ }
+ else
+ {
+ pDate->Year =
+ (isNegative ? -1 : +1) * static_cast<sal_Int16>(nYear);
+ pDate->Month = static_cast<sal_uInt16>(nMonth);
+ pDate->Day = static_cast<sal_uInt16>(nDay);
+ if (bHaveTimezone)
+ {
+ if (pTimeZoneOffset)
+ {
+ *pTimeZoneOffset = nTimezoneOffset;
+ }
+ else
+ {
+ // a Date cannot be adjusted
+ SAL_INFO("sax", "dropping timezone");
+ }
+ }
+ else
+ {
+ if (pTimeZoneOffset)
+ {
+ pTimeZoneOffset->reset();
+ }
+ }
+ rbDateTime = false;
+ }
+ }
+ return bSuccess;
+}
+
+/** convert ISO "time" or "dateTime" string to util::DateTime */
+bool Converter::parseTimeOrDateTime(
+ util::DateTime & rDateTime,
+ std::u16string_view rString)
+{
+ bool dummy;
+ return lcl_parseDateTime(
+ nullptr, rDateTime, dummy, nullptr, rString, true);
+}
+
+/** convert ISO "time" or "dateTime" string to util::DateTime */
+bool Converter::parseTimeOrDateTime(
+ util::DateTime & rDateTime,
+ std::string_view rString)
+{
+ bool dummy;
+ return lcl_parseDateTime(
+ nullptr, rDateTime, dummy, nullptr, rString, true);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+bool Converter::parseDateOrDateTime(
+ util::Date *const pDate, util::DateTime & rDateTime,
+ bool & rbDateTime,
+ std::optional<sal_Int16> *const pTimeZoneOffset,
+ std::u16string_view rString )
+{
+ return lcl_parseDateTime(
+ pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false);
+}
+
+/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
+bool Converter::parseDateOrDateTime(
+ util::Date *const pDate, util::DateTime & rDateTime,
+ bool & rbDateTime,
+ std::optional<sal_Int16> *const pTimeZoneOffset,
+ std::string_view rString )
+{
+ return lcl_parseDateTime(
+ pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false);
+}
+
+/** gets the position of the first comma after npos in the string
+ rStr. Commas inside '"' pairs are not matched */
+sal_Int32 Converter::indexOfComma( std::u16string_view rStr,
+ sal_Int32 nPos )
+{
+ sal_Unicode cQuote = 0;
+ sal_Int32 nLen = rStr.size();
+ for( ; nPos < nLen; nPos++ )
+ {
+ sal_Unicode c = rStr[nPos];
+ switch( c )
+ {
+ case u'\'':
+ if( 0 == cQuote )
+ cQuote = c;
+ else if( '\'' == cQuote )
+ cQuote = 0;
+ break;
+
+ case u'"':
+ if( 0 == cQuote )
+ cQuote = c;
+ else if( '\"' == cQuote )
+ cQuote = 0;
+ break;
+
+ case u',':
+ if( 0 == cQuote )
+ return nPos;
+ break;
+ }
+ }
+
+ return -1;
+}
+
+double Converter::GetConversionFactor(OUStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ double fRetval(1.0);
+ rUnit.setLength(0);
+
+
+ if(nSourceUnit != nTargetUnit)
+ {
+ const o3tl::Length eFrom = Measure2O3tlUnit(nSourceUnit);
+ const o3tl::Length eTo = Measure2O3tlUnit(nTargetUnit);
+ fRetval = o3tl::convert(1.0, eFrom, eTo);
+
+ if (const auto sUnit = Measure2UnitString(nTargetUnit); sUnit.size() > 0)
+ rUnit.appendAscii(sUnit.data(), sUnit.size());
+ }
+
+ return fRetval;
+}
+
+double Converter::GetConversionFactor(OStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit)
+{
+ double fRetval(1.0);
+ rUnit.setLength(0);
+
+
+ if(nSourceUnit != nTargetUnit)
+ {
+ const o3tl::Length eFrom = Measure2O3tlUnit(nSourceUnit);
+ const o3tl::Length eTo = Measure2O3tlUnit(nTargetUnit);
+ fRetval = o3tl::convert(1.0, eFrom, eTo);
+
+ if (const auto sUnit = Measure2UnitString(nTargetUnit); sUnit.size() > 0)
+ rUnit.append(sUnit.data(), sUnit.size());
+ }
+
+ return fRetval;
+}
+
+template<typename V>
+static sal_Int16 lcl_GetUnitFromString(V rString, sal_Int16 nDefaultUnit)
+{
+ sal_Int32 nPos = 0;
+ sal_Int32 nLen = rString.size();
+ sal_Int16 nRetUnit = nDefaultUnit;
+
+ // skip white space
+ while( nPos < nLen && ' ' == rString[nPos] )
+ nPos++;
+
+ // skip negative
+ if( nPos < nLen && '-' == rString[nPos] )
+ nPos++;
+
+ // skip number
+ while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] )
+ nPos++;
+
+ if( nPos < nLen && '.' == rString[nPos] )
+ {
+ nPos++;
+ while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] )
+ nPos++;
+ }
+
+ // skip white space
+ while( nPos < nLen && ' ' == rString[nPos] )
+ nPos++;
+
+ if( nPos < nLen )
+ {
+ switch(rString[nPos])
+ {
+ case '%' :
+ {
+ nRetUnit = MeasureUnit::PERCENT;
+ break;
+ }
+ case 'c':
+ case 'C':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 'm'
+ || rString[nPos+1] == 'M'))
+ nRetUnit = MeasureUnit::CM;
+ break;
+ }
+ case 'e':
+ case 'E':
+ {
+ // CSS1_EMS or CSS1_EMX later
+ break;
+ }
+ case 'i':
+ case 'I':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 'n'
+ || rString[nPos+1] == 'N'))
+ nRetUnit = MeasureUnit::INCH;
+ break;
+ }
+ case 'm':
+ case 'M':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 'm'
+ || rString[nPos+1] == 'M'))
+ nRetUnit = MeasureUnit::MM;
+ break;
+ }
+ case 'p':
+ case 'P':
+ {
+ if(nPos+1 < nLen && (rString[nPos+1] == 't'
+ || rString[nPos+1] == 'T'))
+ nRetUnit = MeasureUnit::POINT;
+ if(nPos+1 < nLen && (rString[nPos+1] == 'c'
+ || rString[nPos+1] == 'C'))
+ nRetUnit = MeasureUnit::TWIP;
+ break;
+ }
+ }
+ }
+
+ return nRetUnit;
+}
+
+sal_Int16 Converter::GetUnitFromString(std::u16string_view rString, sal_Int16 nDefaultUnit)
+{
+ return lcl_GetUnitFromString(rString, nDefaultUnit);
+}
+sal_Int16 Converter::GetUnitFromString(std::string_view rString, sal_Int16 nDefaultUnit)
+{
+ return lcl_GetUnitFromString(rString, nDefaultUnit);
+}
+
+bool Converter::convertAny(OUStringBuffer& rsValue,
+ OUStringBuffer& rsType ,
+ const css::uno::Any& rValue)
+{
+ bool bConverted = false;
+
+ rsValue.setLength(0);
+ rsType.setLength (0);
+
+ switch (rValue.getValueTypeClass())
+ {
+ case css::uno::TypeClass_BYTE :
+ case css::uno::TypeClass_SHORT :
+ case css::uno::TypeClass_UNSIGNED_SHORT :
+ case css::uno::TypeClass_LONG :
+ case css::uno::TypeClass_UNSIGNED_LONG :
+ {
+ sal_Int32 nTempValue = 0;
+ if (rValue >>= nTempValue)
+ {
+ rsType.append("integer");
+ bConverted = true;
+ rsValue.append(nTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_BOOLEAN :
+ {
+ bool bTempValue = false;
+ if (rValue >>= bTempValue)
+ {
+ rsType.append("boolean");
+ bConverted = true;
+ ::sax::Converter::convertBool(rsValue, bTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_FLOAT :
+ case css::uno::TypeClass_DOUBLE :
+ {
+ double fTempValue = 0.0;
+ if (rValue >>= fTempValue)
+ {
+ rsType.append("float");
+ bConverted = true;
+ ::sax::Converter::convertDouble(rsValue, fTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_STRING :
+ {
+ OUString sTempValue;
+ if (rValue >>= sTempValue)
+ {
+ rsType.append("string");
+ bConverted = true;
+ rsValue.append(sTempValue);
+ }
+ }
+ break;
+
+ case css::uno::TypeClass_STRUCT :
+ {
+ css::util::Date aDate ;
+ css::util::Time aTime ;
+ css::util::DateTime aDateTime;
+
+ if (rValue >>= aDate)
+ {
+ rsType.append("date");
+ bConverted = true;
+ css::util::DateTime aTempValue;
+ aTempValue.Day = aDate.Day;
+ aTempValue.Month = aDate.Month;
+ aTempValue.Year = aDate.Year;
+ aTempValue.NanoSeconds = 0;
+ aTempValue.Seconds = 0;
+ aTempValue.Minutes = 0;
+ aTempValue.Hours = 0;
+ ::sax::Converter::convertDateTime(rsValue, aTempValue, nullptr);
+ }
+ else
+ if (rValue >>= aTime)
+ {
+ rsType.append("time");
+ bConverted = true;
+ css::util::Duration aTempValue;
+ aTempValue.Days = 0;
+ aTempValue.Months = 0;
+ aTempValue.Years = 0;
+ aTempValue.NanoSeconds = aTime.NanoSeconds;
+ aTempValue.Seconds = aTime.Seconds;
+ aTempValue.Minutes = aTime.Minutes;
+ aTempValue.Hours = aTime.Hours;
+ ::sax::Converter::convertDuration(rsValue, aTempValue);
+ }
+ else
+ if (rValue >>= aDateTime)
+ {
+ rsType.append("date");
+ bConverted = true;
+ ::sax::Converter::convertDateTime(rsValue, aDateTime, nullptr);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return bConverted;
+}
+
+void Converter::convertBytesToHexBinary(OUStringBuffer& rBuffer, const void* pBytes,
+ sal_Int32 nBytes)
+{
+ rBuffer.setLength(0);
+ rBuffer.ensureCapacity(nBytes * 2);
+ auto pChars = static_cast<const unsigned char*>(pBytes);
+ for (sal_Int32 i = 0; i < nBytes; ++i)
+ {
+ sal_Int32 c = *pChars++;
+ if (c < 16)
+ rBuffer.append('0');
+ rBuffer.append(c, 16);
+ }
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx
new file mode 100644
index 000000000..9b309d5fb
--- /dev/null
+++ b/sax/source/tools/fastattribs.cxx
@@ -0,0 +1,336 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <algorithm>
+
+#include <com/sun/star/xml/sax/SAXException.hpp>
+#include <rtl/math.h>
+#include <sax/fastattribs.hxx>
+#include <utility>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::xml;
+using namespace ::com::sun::star::xml::sax;
+namespace sax_fastparser
+{
+
+// wastage to keep MSVC happy vs. an in-line {}
+FastTokenHandlerBase::~FastTokenHandlerBase()
+{
+}
+
+UnknownAttribute::UnknownAttribute( OUString aNamespaceURL, OString aName, OString value )
+ : maNamespaceURL(std::move( aNamespaceURL )), maName(std::move( aName )), maValue(std::move( value ))
+{
+}
+
+UnknownAttribute::UnknownAttribute( OString aName, OString value )
+ : maName(std::move( aName )), maValue(std::move( value ))
+{
+}
+
+void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const
+{
+ if( pAttrib )
+ {
+ pAttrib->Name = OStringToOUString( maName, RTL_TEXTENCODING_UTF8 );
+ pAttrib->NamespaceURL = maNamespaceURL;
+ pAttrib->Value = OStringToOUString( maValue, RTL_TEXTENCODING_UTF8 );
+ }
+}
+
+FastAttributeList::FastAttributeList( sax_fastparser::FastTokenHandlerBase *pTokenHandler)
+: mpTokenHandler( pTokenHandler )
+{
+ // random initial size of buffer to store attribute values
+ mnChunkLength = 58;
+ mpChunk = static_cast<char *>(malloc( mnChunkLength ));
+ maAttributeValues.push_back( 0 );
+}
+
+FastAttributeList::FastAttributeList( const css::uno::Reference< css::xml::sax::XFastAttributeList > & xAttrList )
+{
+ const auto& rOther = castToFastAttributeList(xAttrList);
+ mpTokenHandler = rOther.mpTokenHandler;
+ mpChunk = static_cast<char *>(malloc( rOther.mnChunkLength ));
+ mnChunkLength = rOther.mnChunkLength;
+ memcpy(mpChunk, rOther.mpChunk, rOther.mnChunkLength);
+ maAttributeValues = rOther.maAttributeValues;
+ maAttributeTokens = rOther.maAttributeTokens;
+ maUnknownAttributes = rOther.maUnknownAttributes;
+}
+
+css::uno::Reference< ::css::util::XCloneable > FastAttributeList::createClone()
+{
+ return new FastAttributeList(this);
+}
+
+FastAttributeList::~FastAttributeList()
+{
+ free( mpChunk );
+}
+
+void FastAttributeList::clear()
+{
+ maAttributeTokens.clear();
+ maAttributeValues.resize(1);
+ assert(maAttributeValues[0] == 0);
+ maUnknownAttributes.clear();
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const char* pValue, size_t nValueLength )
+{
+ assert(nToken != -1);
+ assert(nToken != 0);
+ assert(nValueLength < SAL_MAX_INT32); // protect against absurd values
+ maAttributeTokens.push_back( nToken );
+ sal_Int32 nWritePosition = maAttributeValues.back();
+ maAttributeValues.push_back( maAttributeValues.back() + nValueLength + 1 );
+ if (maAttributeValues.back() > mnChunkLength)
+ {
+ const sal_Int32 newLen = std::max(mnChunkLength * 2, maAttributeValues.back());
+ auto p = static_cast<char*>(realloc(mpChunk, newLen));
+ if (!p)
+ throw std::bad_alloc();
+
+ mnChunkLength = newLen;
+ mpChunk = p;
+
+ }
+ memcpy(mpChunk + nWritePosition, pValue, nValueLength);
+ mpChunk[nWritePosition + nValueLength] = '\0';
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const char* pValue )
+{
+ add( nToken, pValue, strlen( pValue ));
+}
+
+void FastAttributeList::add( sal_Int32 nToken, const OString& rValue )
+{
+ add( nToken, rValue.getStr(), rValue.getLength() );
+}
+
+void FastAttributeList::add(sal_Int32 nToken, std::u16string_view sValue)
+{
+ add(nToken, OUStringToOString(sValue, RTL_TEXTENCODING_UTF8));
+}
+
+void FastAttributeList::addNS( sal_Int32 nNamespaceToken, sal_Int32 nToken, const OString& rValue )
+{
+ sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken;
+ add( nCombinedToken, rValue );
+}
+
+void FastAttributeList::addNS(sal_Int32 nNamespaceToken, sal_Int32 nToken,
+ std::u16string_view sValue)
+{
+ sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken;
+ add(nCombinedToken, sValue);
+}
+
+void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& value )
+{
+ maUnknownAttributes.emplace_back( rNamespaceURL, rName, value );
+}
+
+void FastAttributeList::addUnknown( const OString& rName, const OString& value )
+{
+ maUnknownAttributes.emplace_back( rName, value );
+}
+
+void FastAttributeList::add( const css::uno::Reference<css::xml::sax::XFastAttributeList>& xAttrList )
+{
+ const auto& rOther = castToFastAttributeList(xAttrList);
+ add(rOther);
+}
+
+void FastAttributeList::add( const FastAttributeList& rOther )
+{
+ for (size_t i=0; i < rOther.maAttributeTokens.size(); ++i)
+ add(rOther.maAttributeTokens[i], rOther.getFastAttributeValue(i), rOther.AttributeValueLength(i));
+ for (const auto & i : rOther.maUnknownAttributes)
+ addUnknown(i.maNamespaceURL, i.maName, i.maValue);
+}
+
+// XFastAttributeList
+sal_Bool FastAttributeList::hasAttribute( ::sal_Int32 Token )
+{
+ for (sal_Int32 i : maAttributeTokens)
+ if (i == Token)
+ return true;
+
+ return false;
+}
+
+sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return FastTokenHandlerBase::getTokenFromChars(
+ mpTokenHandler,
+ getFastAttributeValue(i),
+ AttributeValueLength( i ) );
+
+ throw SAXException("FastAttributeList::getValueToken: unknown token " + OUString::number(Token), nullptr, Any());
+}
+
+sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int32 Default )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return FastTokenHandlerBase::getTokenFromChars(
+ mpTokenHandler,
+ getFastAttributeValue(i),
+ AttributeValueLength( i ) );
+
+ return Default;
+}
+
+// performance sensitive shortcuts to avoid allocation ...
+bool FastAttributeList::getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const
+{
+ rInt = 0;
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == nToken)
+ {
+ sal_Int64 n = rtl_str_toInt64_WithLength( getFastAttributeValue(i), 10, AttributeValueLength(i) );
+ if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32) {
+ n = 0;
+ }
+ rInt = n;
+ return true;
+ }
+ return false;
+}
+
+sal_Int32 FastAttributeList::getAsIntegerByIndex( sal_Int32 nTokenIndex ) const
+{
+ sal_Int64 n = rtl_str_toInt64_WithLength( getFastAttributeValue(nTokenIndex), 10, AttributeValueLength(nTokenIndex) );
+ if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32) {
+ n = 0;
+ }
+ return n;
+}
+
+bool FastAttributeList::getAsDouble( sal_Int32 nToken, double &rDouble) const
+{
+ rDouble = 0.0;
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == nToken)
+ {
+ auto const p = getFastAttributeValue(i);
+ rDouble = rtl_math_stringToDouble( p, p + AttributeValueLength(i), '.', 0, nullptr, nullptr );
+ return true;
+ }
+ return false;
+}
+
+bool FastAttributeList::getAsChar( sal_Int32 nToken, const char*& rPos ) const
+{
+ for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i)
+ {
+ if (maAttributeTokens[i] != nToken)
+ continue;
+
+ sal_Int32 nOffset = maAttributeValues[i];
+ rPos = mpChunk + nOffset;
+ return true;
+ }
+
+ return false;
+}
+
+const char* FastAttributeList::getAsCharByIndex( sal_Int32 nTokenIndex ) const
+{
+ sal_Int32 nOffset = maAttributeValues[nTokenIndex];
+ return mpChunk + nOffset;
+}
+
+OUString FastAttributeList::getValue( ::sal_Int32 Token )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
+
+ throw SAXException("FastAttributeList::getValue: unknown token " + OUString::number(Token), nullptr, Any());
+}
+
+OUString FastAttributeList::getValueByIndex( ::sal_Int32 nTokenIndex ) const
+{
+ return OUString( getFastAttributeValue(nTokenIndex), AttributeValueLength(nTokenIndex), RTL_TEXTENCODING_UTF8 );
+}
+
+OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token )
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if (maAttributeTokens[i] == Token)
+ return OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
+
+ return OUString();
+}
+Sequence< Attribute > FastAttributeList::getUnknownAttributes( )
+{
+ auto nSize = maUnknownAttributes.size();
+ if (nSize == 0)
+ return {};
+ Sequence< Attribute > aSeq( nSize );
+ Attribute* pAttr = aSeq.getArray();
+ for( const auto& rAttr : maUnknownAttributes )
+ rAttr.FillAttribute( pAttr++ );
+ return aSeq;
+}
+Sequence< FastAttribute > FastAttributeList::getFastAttributes( )
+{
+ Sequence< FastAttribute > aSeq( maAttributeTokens.size() );
+ FastAttribute* pAttr = aSeq.getArray();
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ {
+ pAttr->Token = maAttributeTokens[i];
+ pAttr->Value = OUString( getFastAttributeValue(i), AttributeValueLength(i), RTL_TEXTENCODING_UTF8 );
+ pAttr++;
+ }
+ return aSeq;
+}
+
+FastAttributeList::FastAttributeIter FastAttributeList::find( sal_Int32 nToken ) const
+{
+ for (size_t i = 0; i < maAttributeTokens.size(); ++i)
+ if( maAttributeTokens[i] == nToken )
+ return FastAttributeIter(*this, i);
+ return end();
+}
+
+sal_Int32 FastTokenHandlerBase::getTokenFromChars(
+ const FastTokenHandlerBase *pTokenHandler,
+ const char *pToken, size_t nLen /* = 0 */ )
+{
+ sal_Int32 nRet;
+
+ if( !nLen )
+ nLen = strlen( pToken );
+
+ nRet = pTokenHandler->getTokenDirect( pToken, static_cast<sal_Int32>(nLen) );
+
+ return nRet;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
new file mode 100644
index 000000000..b4b9de8fb
--- /dev/null
+++ b/sax/source/tools/fastserializer.cxx
@@ -0,0 +1,845 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include "fastserializer.hxx"
+
+#include <com/sun/star/xml/sax/FastTokenHandler.hpp>
+#include <rtl/math.h>
+#include <sal/log.hxx>
+#include <comphelper/processfactory.hxx>
+#include <comphelper/sequence.hxx>
+
+#include <cassert>
+#include <optional>
+#include <string.h>
+#include <string_view>
+#include <utility>
+
+#if OSL_DEBUG_LEVEL > 0
+#include <iostream>
+#include <set>
+#endif
+
+using ::std::vector;
+using ::com::sun::star::uno::Sequence;
+using ::com::sun::star::io::XOutputStream;
+
+#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
+#define NAMESPACE(x) (x >> 16)
+#define TOKEN(x) (x & 0xffff)
+// number of characters without terminating 0
+#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
+
+const char sClosingBracket[] = ">";
+const char sSlashAndClosingBracket[] = "/>";
+constexpr OStringLiteral sColon = ":";
+const char sOpeningBracket[] = "<";
+const char sOpeningBracketAndSlash[] = "</";
+const char sQuote[] = "\"";
+const char sEqualSignAndQuote[] = "=\"";
+const char sSpace[] = " ";
+const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
+
+namespace sax_fastparser {
+ FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
+ : mbMarkStackEmpty(true)
+ , mpDoubleStr(nullptr)
+ , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
+ , mbXescape(true)
+ {
+ rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
+ mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
+ ::comphelper::getProcessComponentContext());
+ assert(xOutputStream.is()); // cannot do anything without that
+ maCachedOutputStream.setOutputStream( xOutputStream );
+ }
+
+ FastSaxSerializer::~FastSaxSerializer()
+ {
+ rtl_string_release(mpDoubleStr);
+ }
+
+ void FastSaxSerializer::startDocument()
+ {
+ writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
+ }
+
+ void FastSaxSerializer::write( double value )
+ {
+ rtl_math_doubleToString(
+ &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
+ RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
+ 0, true);
+
+ write(mpDoubleStr->buffer, mpDoubleStr->length);
+ // and "clear" the string
+ mpDoubleStr->length = 0;
+ mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
+ }
+
+ void FastSaxSerializer::write( std::u16string_view sOutput, bool bEscape )
+ {
+ write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape );
+
+ }
+
+ void FastSaxSerializer::write( const OString& sOutput, bool bEscape )
+ {
+ write( sOutput.getStr(), sOutput.getLength(), bEscape );
+ }
+
+ /** Characters not allowed in XML 1.0
+ XML 1.1 would exclude only U+0000
+
+ This assumes that `string` is UTF-8, but which appears to generally be the case: The only
+ user of this FastSaxSerializer code is FastSerializerHelper, and when its constructor
+ (sax/source/tools/fshelper.cxx) is called with bWriteHeader being true, it calls
+ FastSaxSerializer::startDocument, which writes sXmlHeader claiming encoding="UTF-8". The
+ only place that appears to construct FastSerializerHelper appears to be
+ XmlFilterBase::openFragmentStreamWithSerializer (oox/source/core/xmlfilterbase.cxx), and it
+ only passes false for bWriteHeader when the given rMediaType contains "vml" but not "+xml"
+ (see <https://git.libreoffice.org/core/+/6a11add2c4ea975356cfb7bab02301788c79c904%5E!/>
+ "XLSX VML Export fixes", stating "Don't write xml headers for vml files"). But lets assume
+ that even such Vector Markup Language files are written as UTF-8.
+ */
+ template<typename Int> static std::optional<std::pair<unsigned, Int>> invalidChar(
+ char const * string, Int length, Int index )
+ {
+ assert(index < length);
+ auto const c = string[index];
+
+ if (static_cast<unsigned char>(c) >= 0x20 && c != '\xEF')
+ return {};
+
+ switch (c)
+ {
+ case 0x09:
+ case 0x0a:
+ case 0x0d:
+ return {};
+ case '\xEF': // U+FFFE, U+FFFF:
+ if (length - index >= 3 && string[index + 1] == '\xBF') {
+ switch (string[index + 2]) {
+ case '\xBE':
+ return std::pair(0xFFFE, 3);
+ case '\xBF':
+ return std::pair(0xFFFF, 3);
+ }
+ }
+ return {};
+ }
+ return std::pair(static_cast<unsigned char>(c), 1);
+ }
+
+ static bool isHexDigit( char c )
+ {
+ return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
+ }
+
+ void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
+ {
+ if (nLen == -1)
+ nLen = pStr ? strlen(pStr) : 0;
+
+ if (!bEscape)
+ {
+ writeBytes( pStr, nLen );
+ return;
+ }
+
+ bool bGood = true;
+ const sal_Int32 kXescapeLen = 7;
+ char bufXescape[kXescapeLen+1];
+ sal_Int32 nNextXescape = 0;
+ for (sal_Int32 i = 0; i < nLen;)
+ {
+ char c = pStr[ i ];
+ switch( c )
+ {
+ case '<': writeBytes( "&lt;", 4 ); break;
+ case '>': writeBytes( "&gt;", 4 ); break;
+ case '&': writeBytes( "&amp;", 5 ); break;
+ case '\'': writeBytes( "&apos;", 6 ); break;
+ case '"': writeBytes( "&quot;", 6 ); break;
+ case '\t':
+#if 0
+ // Seems OOXML prefers the _xHHHH_ escape over the
+ // entity in *some* cases, apparently in attribute
+ // values but not in element data.
+ // Would need to distinguish at a higher level.
+ if (mbXescape)
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ static_cast<unsigned int>(static_cast<unsigned char>(c)));
+ writeBytes( bufXescape, kXescapeLen);
+ }
+ else
+#endif
+ {
+ writeBytes( "&#9;", 4 );
+ }
+ break;
+ case '\n':
+#if 0
+ if (mbXescape)
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ static_cast<unsigned int>(static_cast<unsigned char>(c)));
+ writeBytes( bufXescape, kXescapeLen);
+ }
+ else
+#endif
+ {
+ writeBytes( "&#10;", 5 );
+ }
+ break;
+ case '\r':
+#if 0
+ if (mbXescape)
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ static_cast<unsigned int>(static_cast<unsigned char>(c)));
+ writeBytes( bufXescape, kXescapeLen);
+ }
+ else
+#endif
+ {
+ writeBytes( "&#13;", 5 );
+ }
+ break;
+ default:
+ if (mbXescape)
+ {
+ char c1, c2, c3, c4;
+ // Escape characters not valid in XML 1.0 as
+ // _xHHHH_. A literal "_xHHHH_" has to be
+ // escaped as _x005F_xHHHH_ (effectively
+ // escaping the leading '_').
+ // See ECMA-376-1:2016 page 3736,
+ // 22.4.2.4 bstr (Basic String)
+ // for reference.
+ if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
+ pStr[i+6] == '_' &&
+ ((pStr[i+1] | 0x20) == 'x') &&
+ isHexDigit( c1 = pStr[i+2] ) &&
+ isHexDigit( c2 = pStr[i+3] ) &&
+ isHexDigit( c3 = pStr[i+4] ) &&
+ isHexDigit( c4 = pStr[i+5] ))
+ {
+ // OOXML has the odd habit to write some
+ // names using this that when re-saving
+ // should *not* be escaped, specifically
+ // _x0020_ for blanks in w:xpath values.
+ if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
+ {
+ // When encountering "_x005F_xHHHH_"
+ // assume that is an already escaped
+ // sequence that was not unescaped and
+ // shall be written as is, to not end
+ // up with "_x005F_x005F_xHHHH_" and
+ // repeated...
+ if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
+ i + kXescapeLen <= nLen - 6 &&
+ pStr[i+kXescapeLen+5] == '_' &&
+ ((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
+ isHexDigit( pStr[i+kXescapeLen+1] ) &&
+ isHexDigit( pStr[i+kXescapeLen+2] ) &&
+ isHexDigit( pStr[i+kXescapeLen+3] ) &&
+ isHexDigit( pStr[i+kXescapeLen+4] ))
+ {
+ writeBytes( &c, 1 );
+ // Remember this fake escapement.
+ nNextXescape = i + kXescapeLen + 6;
+ }
+ else
+ {
+ writeBytes( "_x005F_", kXescapeLen);
+ // Remember this escapement so in
+ // _xHHHH_xHHHH_ only the first '_'
+ // is escaped.
+ nNextXescape = i + kXescapeLen;
+ }
+ break;
+ }
+ }
+ if (auto const inv = invalidChar(pStr, nLen, i))
+ {
+ snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
+ inv->first);
+ writeBytes( bufXescape, kXescapeLen);
+ i += inv->second;
+ continue;
+ }
+ }
+#if OSL_DEBUG_LEVEL > 0
+ else
+ {
+ if (bGood && invalidChar(pStr, nLen, i))
+ {
+ bGood = false;
+ // The SAL_WARN() for the single character is
+ // issued in writeBytes(), just gather for the
+ // SAL_WARN_IF() below.
+ }
+ }
+#endif
+ writeBytes( &c, 1 );
+ break;
+ }
+ ++i;
+ }
+ SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
+ }
+
+ void FastSaxSerializer::endDocument()
+ {
+ assert(mbMarkStackEmpty && maMarkStack.empty());
+ maCachedOutputStream.flush();
+ }
+
+ void FastSaxSerializer::writeId( ::sal_Int32 nElement )
+ {
+ if( HAS_NAMESPACE( nElement ) ) {
+ auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
+ assert(Namespace.hasElements());
+ writeBytes(Namespace);
+ writeBytes(sColon.getStr(), sColon.getLength());
+ auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
+ assert(Element.hasElements());
+ writeBytes(Element);
+ } else {
+ auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement));
+ assert(Element.hasElements());
+ writeBytes(Element);
+ }
+ }
+
+#ifdef DBG_UTIL
+ OString FastSaxSerializer::getId( ::sal_Int32 nElement )
+ {
+ if (HAS_NAMESPACE(nElement)) {
+ Sequence<sal_Int8> const ns(
+ mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
+ Sequence<sal_Int8> const name(
+ mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
+ return std::string_view(
+ reinterpret_cast<char const*>(ns.getConstArray()), ns.getLength())
+ + sColon
+ + std::string_view(
+ reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
+ } else {
+ Sequence<sal_Int8> const name(
+ mxFastTokenHandler->getUTF8Identifier(nElement));
+ return OString(reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
+ }
+ }
+#endif
+
+ void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
+ {
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
+ maMarkStack.top()->setCurrentElement( Element );
+ }
+
+#ifdef DBG_UTIL
+ if (mbMarkStackEmpty)
+ m_DebugStartedElements.push(Element);
+ else
+ maMarkStack.top()->m_DebugStartedElements.push_back(Element);
+#endif
+
+ writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
+
+ writeId(Element);
+ if (pAttrList)
+ writeFastAttributeList(*pAttrList);
+ else
+ writeTokenValueList();
+
+ writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
+ }
+
+ void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
+ {
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Element Type Match
+ if (mbMarkStackEmpty)
+ {
+ assert(!m_DebugStartedElements.empty());
+ assert(Element == m_DebugStartedElements.top());
+ m_DebugStartedElements.pop();
+ }
+ else
+ {
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+ {
+ // Sort is always well-formed fragment
+ assert(!maMarkStack.top()->m_DebugStartedElements.empty());
+ }
+ if (maMarkStack.top()->m_DebugStartedElements.empty())
+ {
+ maMarkStack.top()->m_DebugEndedElements.push_back(Element);
+ }
+ else
+ {
+ assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
+ maMarkStack.top()->m_DebugStartedElements.pop_back();
+ }
+ }
+#endif
+
+ writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
+
+ writeId(Element);
+
+ writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
+ }
+
+ void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
+ {
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
+ maMarkStack.top()->setCurrentElement( Element );
+ }
+
+ writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
+
+ writeId(Element);
+ if (pAttrList)
+ writeFastAttributeList(*pAttrList);
+ else
+ writeTokenValueList();
+
+ writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
+ }
+
+ css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const
+ {
+ return maCachedOutputStream.getOutputStream();
+ }
+
+ void FastSaxSerializer::writeTokenValueList()
+ {
+#ifdef DBG_UTIL
+ ::std::set<OString> DebugAttributes;
+#endif
+ for (const TokenValue & rTokenValue : maTokenValues)
+ {
+ writeBytes(sSpace, N_CHARS(sSpace));
+
+ sal_Int32 nToken = rTokenValue.nToken;
+ writeId(nToken);
+
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Unique Att Spec
+ OString const nameId(getId(nToken));
+ assert(DebugAttributes.find(nameId) == DebugAttributes.end());
+ DebugAttributes.insert(nameId);
+#endif
+
+ writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
+
+ write(rTokenValue.pValue, -1, true);
+
+ writeBytes(sQuote, N_CHARS(sQuote));
+ }
+ maTokenValues.clear();
+ }
+
+ void FastSaxSerializer::writeFastAttributeList(FastAttributeList const & rAttrList)
+ {
+#ifdef DBG_UTIL
+ ::std::set<OString> DebugAttributes;
+#endif
+ const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
+ for (size_t j = 0; j < Tokens.size(); j++)
+ {
+ writeBytes(sSpace, N_CHARS(sSpace));
+
+ sal_Int32 nToken = Tokens[j];
+ writeId(nToken);
+
+#ifdef DBG_UTIL
+ // Well-formedness constraint: Unique Att Spec
+ OString const nameId(getId(nToken));
+ SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
+ assert(DebugAttributes.find(nameId) == DebugAttributes.end());
+ DebugAttributes.insert(nameId);
+#endif
+
+ writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
+
+ const char* pAttributeValue = rAttrList.getFastAttributeValue(j);
+
+ // tdf#117274 don't escape the special VML shape type id "#_x0000_t202"
+ bool bEscape = !(pAttributeValue
+ && *pAttributeValue != '\0'
+ && (*pAttributeValue == '#'
+ ? strncmp(pAttributeValue, "#_x0000_t", 9) == 0
+ : strncmp(pAttributeValue, "_x0000_t", 8) == 0));
+
+ write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape);
+
+ writeBytes(sQuote, N_CHARS(sQuote));
+ }
+ }
+
+ void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder)
+ {
+ if (rOrder.hasElements())
+ {
+ auto pSort = std::make_shared<ForSort>(nTag, rOrder);
+ maMarkStack.push( pSort );
+ maCachedOutputStream.setOutput( pSort );
+ }
+ else
+ {
+ auto pMerge = std::make_shared<ForMerge>(nTag);
+ maMarkStack.push( pMerge );
+ maCachedOutputStream.setOutput( pMerge );
+ }
+ mbMarkStackEmpty = false;
+ }
+
+#ifdef DBG_UTIL
+ static void lcl_DebugMergeAppend(
+ std::deque<sal_Int32> & rLeftEndedElements,
+ std::deque<sal_Int32> & rLeftStartedElements,
+ std::deque<sal_Int32> & rRightEndedElements,
+ std::deque<sal_Int32> & rRightStartedElements)
+ {
+ while (!rRightEndedElements.empty())
+ {
+ if (rLeftStartedElements.empty())
+ {
+ rLeftEndedElements.push_back(rRightEndedElements.front());
+ }
+ else
+ {
+ assert(rLeftStartedElements.back() == rRightEndedElements.front());
+ rLeftStartedElements.pop_back();
+ }
+ rRightEndedElements.pop_front();
+ }
+ while (!rRightStartedElements.empty())
+ {
+ rLeftStartedElements.push_back(rRightStartedElements.front());
+ rRightStartedElements.pop_front();
+ }
+ }
+
+ static void lcl_DebugMergePrepend(
+ std::deque<sal_Int32> & rLeftEndedElements,
+ std::deque<sal_Int32> & rLeftStartedElements,
+ std::deque<sal_Int32> & rRightEndedElements,
+ std::deque<sal_Int32> & rRightStartedElements)
+ {
+ while (!rLeftStartedElements.empty())
+ {
+ if (rRightEndedElements.empty())
+ {
+ rRightStartedElements.push_front(rLeftStartedElements.back());
+ }
+ else
+ {
+ assert(rRightEndedElements.front() == rLeftStartedElements.back());
+ rRightEndedElements.pop_front();
+ }
+ rLeftStartedElements.pop_back();
+ }
+ while (!rLeftEndedElements.empty())
+ {
+ rRightEndedElements.push_front(rLeftEndedElements.back());
+ rLeftEndedElements.pop_back();
+ }
+ }
+#endif
+
+ void FastSaxSerializer::mergeTopMarks(
+ sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType)
+ {
+ SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
+ assert(!mbMarkStackEmpty); // should never happen
+ if ( mbMarkStackEmpty )
+ return;
+
+ assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!");
+ (void) nTag;
+#ifdef DBG_UTIL
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
+ {
+ // Sort is always well-formed fragment
+ assert(maMarkStack.top()->m_DebugStartedElements.empty());
+ assert(maMarkStack.top()->m_DebugEndedElements.empty());
+ }
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ maMarkStack.top()->m_DebugPostponedEndedElements,
+ maMarkStack.top()->m_DebugPostponedStartedElements);
+#endif
+
+ // flush, so that we get everything in getData()
+ maCachedOutputStream.flush();
+
+ if (maMarkStack.size() == 1)
+ {
+#ifdef DBG_UTIL
+ while (!maMarkStack.top()->m_DebugEndedElements.empty())
+ {
+ assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
+ maMarkStack.top()->m_DebugEndedElements.pop_front();
+ m_DebugStartedElements.pop();
+ }
+ while (!maMarkStack.top()->m_DebugStartedElements.empty())
+ {
+ m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
+ maMarkStack.top()->m_DebugStartedElements.pop_front();
+ }
+#endif
+ Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
+ maMarkStack.pop();
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
+ return;
+ }
+
+#ifdef DBG_UTIL
+ ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
+ ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
+#endif
+ const Int8Sequence aMerge( maMarkStack.top()->getData() );
+ maMarkStack.pop();
+#ifdef DBG_UTIL
+ switch (eMergeType)
+ {
+ case MergeMarks::APPEND:
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ break;
+ case MergeMarks::PREPEND:
+ if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
+ {
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ }
+ else
+ {
+ lcl_DebugMergePrepend(
+ topDebugEndedElements,
+ topDebugStartedElements,
+ maMarkStack.top()->m_DebugEndedElements,
+ maMarkStack.top()->m_DebugStartedElements);
+ }
+ break;
+ case MergeMarks::POSTPONE:
+ lcl_DebugMergeAppend(
+ maMarkStack.top()->m_DebugPostponedEndedElements,
+ maMarkStack.top()->m_DebugPostponedStartedElements,
+ topDebugEndedElements,
+ topDebugStartedElements);
+ break;
+ }
+#endif
+ if (maMarkStack.empty())
+ {
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ }
+ else
+ {
+ maCachedOutputStream.setOutput( maMarkStack.top() );
+ }
+
+ switch ( eMergeType )
+ {
+ case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break;
+ case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break;
+ case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
+ }
+ }
+
+ void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
+ {
+ maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
+ }
+
+ void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
+ {
+#if OSL_DEBUG_LEVEL > 0
+ {
+ bool bGood = true;
+ for (size_t i=0; i < nLen;)
+ {
+ if (auto const inv = invalidChar(pStr, nLen, i))
+ {
+ bGood = false;
+ SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
+ std::hex << inv->first);
+ i += inv->second;
+ continue;
+ }
+ ++i;
+ }
+ SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
+ }
+#endif
+ maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
+ }
+
+ FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
+ {
+ merge( maData, maPostponed, true );
+ maPostponed.realloc( 0 );
+
+ return maData;
+ }
+
+#if OSL_DEBUG_LEVEL > 0
+ void FastSaxSerializer::ForMerge::print( )
+ {
+ std::cerr << "Data: ";
+ for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
+ {
+ std::cerr << maData[i];
+ }
+
+ std::cerr << "\nPostponed: ";
+ for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
+ {
+ std::cerr << maPostponed[i];
+ }
+
+ std::cerr << "\n";
+ }
+#endif
+
+ void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
+ {
+ merge( maData, rWhat, false );
+ }
+
+ void FastSaxSerializer::ForMerge::append( const css::uno::Sequence<sal_Int8> &rWhat )
+ {
+ merge( maData, rWhat, true );
+ }
+
+ void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
+ {
+ merge( maPostponed, rWhat, true );
+ }
+
+ void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
+ {
+ sal_Int32 nMergeLen = rMerge.getLength();
+ if ( nMergeLen <= 0 )
+ return;
+
+ sal_Int32 nTopLen = rTop.getLength();
+
+ rTop.realloc( nTopLen + nMergeLen );
+ if ( bAppend )
+ {
+ // append the rMerge to the rTop
+ memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
+ }
+ else
+ {
+ // prepend the rMerge to the rTop
+ memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
+ memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
+ }
+ }
+
+ void FastSaxSerializer::ForMerge::resetData( )
+ {
+ maData = Int8Sequence();
+ }
+
+ void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
+ {
+ const auto & rOrder = maOrder;
+ if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() )
+ {
+ mnCurrentElement = nElement;
+ if ( maData.find( nElement ) == maData.end() )
+ maData[ nElement ] = Int8Sequence();
+ }
+ }
+
+ void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
+ {
+ append( rWhat );
+ }
+
+ void FastSaxSerializer::ForSort::append( const css::uno::Sequence<sal_Int8> &rWhat )
+ {
+ merge( maData[mnCurrentElement], rWhat, true );
+ }
+
+ void FastSaxSerializer::ForSort::sort()
+ {
+ // Clear the ForMerge data to avoid duplicate items
+ resetData();
+
+ // Sort it all
+ std::map< sal_Int32, Int8Sequence >::iterator iter;
+ for ( const auto nIndex : std::as_const(maOrder) )
+ {
+ iter = maData.find( nIndex );
+ if ( iter != maData.end() )
+ ForMerge::append( iter->second );
+ }
+ }
+
+ FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
+ {
+ sort( );
+ return ForMerge::getData();
+ }
+
+#if OSL_DEBUG_LEVEL > 0
+ void FastSaxSerializer::ForSort::print( )
+ {
+ for ( const auto& [rElement, rData] : maData )
+ {
+ std::cerr << "pair: " << rElement;
+ for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i )
+ std::cerr << rData[i];
+ std::cerr << "\n";
+ }
+
+ sort( );
+ ForMerge::print();
+ }
+#endif
+
+} // namespace sax_fastparser
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
new file mode 100644
index 000000000..109ada3c7
--- /dev/null
+++ b/sax/source/tools/fastserializer.hxx
@@ -0,0 +1,255 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX
+#define INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX
+
+#include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
+#include <com/sun/star/io/XOutputStream.hpp>
+
+#include <sax/fastattribs.hxx>
+#include <sax/fshelper.hxx>
+#include "CachedOutputStream.hxx"
+
+#include <stack>
+#include <string_view>
+#include <map>
+#include <memory>
+
+namespace sax_fastparser {
+
+struct TokenValue
+{
+ sal_Int32 nToken;
+ const char *pValue;
+ TokenValue(sal_Int32 _nToken, const char *_pValue) : nToken(_nToken), pValue(_pValue) {}
+};
+typedef std::vector<TokenValue> TokenValueList;
+
+/// Receives notification of sax document events to write into an XOutputStream.
+class FastSaxSerializer
+{
+ typedef css::uno::Sequence< ::sal_Int8 > Int8Sequence;
+ typedef css::uno::Sequence< ::sal_Int32 > Int32Sequence;
+
+public:
+ explicit FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream >& xOutputStream);
+ ~FastSaxSerializer();
+
+ css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const;
+ /// called by FSHelper to put data in for writeTokenValueList
+ TokenValueList& getTokenValueList() { return maTokenValues; }
+
+ /** called by the parser when parsing of an XML stream is started.
+ */
+ void startDocument();
+
+ /** called by the parser after the last XML element of a stream is processed.
+ */
+ void endDocument();
+
+ /** receives notification of the beginning of an element.
+
+ @param Element
+ contains the integer token from the <type>XFastTokenHandler</type>
+ registered at the <type>XFastParser</type>.<br>
+
+ If the element has a namespace that was registered with the
+ <type>XFastParser</type>, <param>Element</param> contains the integer
+ token of the elements local name from the <type>XFastTokenHandler</type>
+ and the integer token of the namespace combined with an arithmetic
+ <b>or</b> operation.
+
+ @param pAttrList
+ Contains a <type>FastAttributeList</type> to access the attributes
+ from the element.
+
+ */
+ void startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr );
+
+ /** receives notification of the end of a known element.
+ @see startFastElement
+ */
+ void endFastElement( ::sal_Int32 Element );
+
+ /** receives notification of the beginning of a single element.
+
+ @param Element
+ contains the integer token from the <type>XFastTokenHandler</type>
+ registered at the <type>XFastParser</type>.<br>
+
+ If the element has a namespace that was registered with the
+ <type>XFastParser</type>, <param>Element</param> contains the integer
+ token of the elements local name from the <type>XFastTokenHandler</type>
+ and the integer token of the namespace combined with an arithmetic
+ <b>or</b> operation.
+
+ @param pAttrList
+ Contains a <type>FastAttributeList</type> to access the attributes
+ from the element.
+
+ */
+ void singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr );
+
+ // C++ helpers
+ void writeId( ::sal_Int32 Element );
+ OString getId( ::sal_Int32 Element );
+
+ void write( double value );
+ void write( std::u16string_view s, bool bEscape = false );
+ void write( const OString& s, bool bEscape = false );
+ void write( const char* pStr, sal_Int32 nLen, bool bEscape = false );
+
+public:
+ /** From now on, don't write directly to the stream, but to top of a stack.
+
+ This is to be able to change the order of the data being written.
+ If you need to write eg.
+ p, r, rPr, [something], /rPr, t, [text], /t, /r, /p,
+ but get it in order
+ p, r, t, [text], /t, rPr, [something], /rPr, /r, /p,
+ simply do
+ p, r, mark(), t, [text], /t, mark(), rPr, [something], /rPr,
+ mergeTopMarks( MergeMarks::PREPEND ), mergeTopMarks( MergeMarks::APPEND ), /r, /p
+ and you are done.
+
+ @param nTag debugging aid to ensure mark and merge match in LIFO order
+ */
+ void mark(sal_Int32 nTag, const Int32Sequence& rOrder);
+
+ /** Merge 2 topmost marks.
+
+ The possibilities: prepend the top before the second top-most
+ mark, append it, append it later or ignore; prepending brings the possibility
+ to switch parts of the output, appending later allows to write some
+ output in advance.
+
+ Writes the result to the output stream if the mark stack becomes empty
+ by the operation.
+
+ When the MergeMarks::POSTPONE is specified, the merge happens just
+ before the next merge.
+
+ @param nTag debugging aid to ensure mark and merge match in LIFO order
+
+ @see mark()
+ */
+ void mergeTopMarks(sal_Int32 nTag,
+ sax_fastparser::MergeMarks eMergeType);
+
+private:
+ /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append.
+ * Its flush method needs to be called before touching maMarkStack
+ * to ensure correct order of ForSort methods.
+ */
+ CachedOutputStream maCachedOutputStream;
+ css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler;
+
+ class ForMerge : public ForMergeBase
+ {
+ Int8Sequence maData;
+ Int8Sequence maPostponed;
+
+ public:
+ sal_Int32 const m_Tag;
+#ifdef DBG_UTIL
+ // pending close tags, followed by pending open tags
+ std::deque<sal_Int32> m_DebugEndedElements;
+ std::deque<sal_Int32> m_DebugStartedElements;
+ // ... and another buffer for maPostponed ...
+ std::deque<sal_Int32> m_DebugPostponedEndedElements;
+ std::deque<sal_Int32> m_DebugPostponedStartedElements;
+#endif
+
+ explicit ForMerge(sal_Int32 const nTag) : m_Tag(nTag) {}
+
+ virtual void setCurrentElement( ::sal_Int32 /*nToken*/ ) {}
+ virtual Int8Sequence& getData();
+#if OSL_DEBUG_LEVEL > 0
+ virtual void print();
+#endif
+
+ virtual void prepend( const Int8Sequence &rWhat );
+ virtual void append( const css::uno::Sequence<sal_Int8> &rWhat ) override;
+ void postpone( const Int8Sequence &rWhat );
+
+ protected:
+ void resetData( );
+ static void merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend );
+ };
+
+ class ForSort : public ForMerge
+ {
+ std::map< ::sal_Int32, Int8Sequence > maData;
+ sal_Int32 mnCurrentElement;
+
+ Int32Sequence maOrder;
+
+ public:
+ ForSort(sal_Int32 const nTag, const Int32Sequence& rOrder)
+ : ForMerge(nTag)
+ , mnCurrentElement( 0 )
+ , maOrder( rOrder )
+ {}
+
+ void setCurrentElement( ::sal_Int32 nToken ) override;
+
+ virtual Int8Sequence& getData() override;
+
+#if OSL_DEBUG_LEVEL > 0
+ virtual void print() override;
+#endif
+
+ virtual void prepend( const Int8Sequence &rWhat ) override;
+ virtual void append( const css::uno::Sequence<sal_Int8> &rWhat ) override;
+ private:
+ void sort();
+ };
+
+ std::stack< std::shared_ptr< ForMerge > > maMarkStack;
+ bool mbMarkStackEmpty;
+ // Would be better to use OStringBuffer instead of these two
+ // but then we couldn't get the rtl_String* member :-(
+ rtl_String *mpDoubleStr;
+ sal_Int32 mnDoubleStrCapacity;
+ TokenValueList maTokenValues;
+ bool mbXescape; ///< whether to escape invalid XML characters as _xHHHH_ in write(const char*,sal_Int32,true)
+ /* TODO: make that configurable from the outside for
+ * some specific cases? */
+
+#ifdef DBG_UTIL
+ std::stack<sal_Int32> m_DebugStartedElements;
+#endif
+
+ void writeTokenValueList();
+ void writeFastAttributeList(FastAttributeList const & rAttrList);
+
+ /** Forward the call to the output stream, or write to the stack.
+
+ The latter in the case that we are inside a mark().
+ */
+ void writeBytes( const css::uno::Sequence< ::sal_Int8 >& aData );
+ void writeBytes( const char* pStr, size_t nLen );
+};
+
+} // namespace sax_fastparser
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/source/tools/fshelper.cxx b/sax/source/tools/fshelper.cxx
new file mode 100644
index 000000000..a560e5bd1
--- /dev/null
+++ b/sax/source/tools/fshelper.cxx
@@ -0,0 +1,155 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sax/fshelper.hxx>
+#include "fastserializer.hxx"
+
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::uno;
+
+namespace sax_fastparser {
+
+FastSerializerHelper::FastSerializerHelper(const Reference< io::XOutputStream >& xOutputStream, bool bWriteHeader ) :
+ mpSerializer(new FastSaxSerializer(xOutputStream))
+{
+ if( bWriteHeader )
+ mpSerializer->startDocument();
+}
+
+FastSerializerHelper::~FastSerializerHelper()
+{
+ mpSerializer->endDocument();
+ delete mpSerializer;
+}
+
+void FastSerializerHelper::startElement(sal_Int32 elementTokenId)
+{
+ mpSerializer->startFastElement(elementTokenId);
+}
+void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const char* value)
+{
+ mpSerializer->getTokenValueList().emplace_back(attribute, value);
+}
+void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const OString& value)
+{
+ mpSerializer->getTokenValueList().emplace_back(attribute, value.getStr());
+}
+void FastSerializerHelper::singleElement(sal_Int32 elementTokenId)
+{
+ mpSerializer->singleFastElement(elementTokenId);
+}
+
+void FastSerializerHelper::endElement(sal_Int32 elementTokenId)
+{
+ mpSerializer->endFastElement(elementTokenId);
+}
+
+void FastSerializerHelper::startElement(sal_Int32 elementTokenId, const rtl::Reference<FastAttributeList>& xAttrList)
+{
+ assert(xAttrList);
+ mpSerializer->startFastElement(elementTokenId, xAttrList.get());
+}
+
+void FastSerializerHelper::singleElement(sal_Int32 elementTokenId, const rtl::Reference<FastAttributeList>& xAttrList)
+{
+ assert(xAttrList);
+ mpSerializer->singleFastElement(elementTokenId, xAttrList.get());
+}
+
+FastSerializerHelper* FastSerializerHelper::write(const char* value)
+{
+ mpSerializer->write(value, -1);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(const OString& value)
+{
+ mpSerializer->write(value);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(std::u16string_view value)
+{
+ mpSerializer->write(value);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(sal_Int32 value)
+{
+ mpSerializer->write(OString::number(value));
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(sal_Int64 value)
+{
+ mpSerializer->write(OString::number(value));
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::write(double value)
+{
+ mpSerializer->write(value);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::writeEscaped(const char* value)
+{
+ mpSerializer->write(value, -1, true);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::writeEscaped(std::u16string_view value)
+{
+ if (!value.empty())
+ mpSerializer->write(value, true);
+ return this;
+}
+
+FastSerializerHelper* FastSerializerHelper::writeId(sal_Int32 tokenId)
+{
+ mpSerializer->writeId(tokenId);
+ return this;
+}
+
+css::uno::Reference< css::io::XOutputStream > const & FastSerializerHelper::getOutputStream() const
+{
+ return mpSerializer->getOutputStream();
+}
+
+void FastSerializerHelper::mark(
+ sal_Int32 const nTag, const Sequence<sal_Int32>& rOrder)
+{
+ mpSerializer->mark(nTag, rOrder);
+}
+
+void FastSerializerHelper::mergeTopMarks(
+ sal_Int32 const nTag, MergeMarks const eMergeType)
+{
+ mpSerializer->mergeTopMarks(nTag, eMergeType);
+}
+
+rtl::Reference<FastAttributeList> FastSerializerHelper::createAttrList()
+{
+ return new FastAttributeList( nullptr );
+}
+
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/test/makefile.mk b/sax/test/makefile.mk
new file mode 100644
index 000000000..e2ae1546c
--- /dev/null
+++ b/sax/test/makefile.mk
@@ -0,0 +1,58 @@
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+
+PRJ=..
+
+PRJNAME=extensions
+TARGET=workben
+LIBTARGET=NO
+
+TARGETTYPE=CUI
+ENABLE_EXCEPTIONS=TRUE
+
+# --- Settings -----------------------------------------------------
+
+.INCLUDE : settings.mk
+# --- Files --------------------------------------------------------
+
+.IF "$(BUILD_TYPE)" == "$(BUILD_TYPE:s/DESKTOP//)"
+
+ALL:
+# nothing
+
+.ENDIF
+
+#
+# std testcomponent
+#
+APP1TARGET = testcomponent
+APP2TARGET = saxdemo
+
+APP1OBJS = $(OBJ)$/testcomponent.obj
+APP1STDLIBS = $(SALLIB) \
+ $(CPPULIB)\
+ $(CPPUHELPERLIB)
+
+APP2OBJS = $(OBJ)$/saxdemo.obj
+APP2STDLIBS = $(SALLIB) \
+ $(CPPULIB) \
+ $(CPPUHELPERLIB)
+
+# --- Targets ------------------------------------------------------
+
+.INCLUDE : target.mk
diff --git a/sax/test/sax/exports.dxp b/sax/test/sax/exports.dxp
new file mode 100644
index 000000000..86214860d
--- /dev/null
+++ b/sax/test/sax/exports.dxp
@@ -0,0 +1,2 @@
+component_getFactory
+component_writeInfo
diff --git a/sax/test/sax/factory.hxx b/sax/test/sax/factory.hxx
new file mode 100644
index 000000000..688691c9c
--- /dev/null
+++ b/sax/test/sax/factory.hxx
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SAX_TEST_SAX_FACTORY_HXX
+#define INCLUDED_SAX_TEST_SAX_FACTORY_HXX
+
+#include <rtl/strbuf.hxx>
+
+namespace sax_test {
+Reference< XInterface > SAL_CALL OSaxWriterTest_CreateInstance(
+ const Reference< XMultiServiceFactory > & rSMgr ) throw ( Exception );
+OUString OSaxWriterTest_getServiceName( ) throw();
+OUString OSaxWriterTest_getImplementationName( ) throw();
+Sequence<OUString> OSaxWriterTest_getSupportedServiceNames( ) throw();
+}
+#define BUILD_ERROR(expr, Message)\
+ {\
+ m_seqErrors.realloc( m_seqErrors.getLength() + 1 ); \
+ m_seqExceptions.realloc( m_seqExceptions.getLength() + 1 ); \
+ OStringBuffer str(128); \
+ str.append( __FILE__ );\
+ str.append( " " ); \
+ str.append( "(" ); \
+ str.append( OString::valueOf( (sal_Int32)__LINE__) );\
+ str.append(")\n" );\
+ str.append( "[ " ); \
+ str.append( #expr ); \
+ str.append( " ] : " ); \
+ str.append( Message ); \
+ m_seqErrors.getArray()[ m_seqErrors.getLength()-1] =\
+ OStringToOUString( str.makeStringAndClear() , RTL_TEXTENCODING_ASCII_US ); \
+ }\
+ ((void)0)
+
+
+#define WARNING_ASSERT(expr, Message) \
+ if( ! (expr) ) { \
+ m_seqWarnings.realloc( m_seqErrors.getLength() +1 ); \
+ OStringBuffer str(128);\
+ str.append( __FILE__);\
+ str.append( " "); \
+ str.append( "(" ); \
+ str.append(OString::valueOf( (sal_Int32)__LINE__)) ;\
+ str.append( ")\n");\
+ str.append( "[ " ); \
+ str.append( #expr ); \
+ str.append( " ] : ") ; \
+ str.append( Message); \
+ m_seqWarnings.getArray()[ m_seqWarnings.getLength()-1] =\
+ OStringToOUString( str.makeStringAndClear() , RTL_TEXTENCODING_ASCII_US ); \
+ return; \
+ }\
+ ((void)0)
+
+#define ERROR_ASSERT(expr, Message) \
+ if( ! (expr) ) { \
+ BUILD_ERROR(expr, Message );\
+ return; \
+ }\
+ ((void)0)
+
+#endif // INCLUDED_SAX_TEST_SAX_FACTORY_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/test/sax/makefile.mk b/sax/test/sax/makefile.mk
new file mode 100644
index 000000000..9aa5864ca
--- /dev/null
+++ b/sax/test/sax/makefile.mk
@@ -0,0 +1,52 @@
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# This file incorporates work covered by the following license notice:
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to you under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.apache.org/licenses/LICENSE-2.0 .
+#
+PRJ=..$/..
+
+PRJNAME=extensions
+TARGET=testsax
+USE_DEFFILE=TRUE
+ENABLE_EXCEPTIONS=TRUE
+# --- Settings -----------------------------------------------------
+.INCLUDE : settings.mk
+
+# --- Files --------------------------------------------------------
+
+
+SLOFILES = $(SLO)$/testsax.obj \
+ $(SLO)$/testwriter.obj
+
+SHL1TARGET= $(TARGET)
+SHL1IMPLIB= i$(TARGET)
+
+SHL1STDLIBS= \
+ $(SALLIB) \
+ $(CPPULIB) \
+ $(CPPUHELPERLIB)
+
+
+SHL1LIBS= $(SLB)$/$(TARGET).lib
+SHL1DEPN= makefile.mk $(SHL1LIBS)
+SHL1DEF= $(MISC)$/$(SHL1TARGET).def
+
+DEF1NAME= $(SHL1TARGET)
+DEF1EXPORTFILE= exports.dxp
+
+
+# --- Targets ------------------------------------------------------
+
+.INCLUDE : target.mk
diff --git a/sax/test/sax/testsax.cxx b/sax/test/sax/testsax.cxx
new file mode 100644
index 000000000..882a7a96d
--- /dev/null
+++ b/sax/test/sax/testsax.cxx
@@ -0,0 +1,795 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <osl/time.h>
+#include <osl/diagnose.h>
+
+#include <com/sun/star/test/XSimpleTest.hpp>
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+#include <com/sun/star/xml/sax/XParser.hpp>
+#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
+
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/implbase.hxx>
+
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::test;
+using namespace ::com::sun::star::registry;
+using namespace ::com::sun::star::io;
+using namespace ::com::sun::star::xml::sax;
+
+// test scenarios
+
+namespace sax_test {
+
+class OSaxParserTest : public WeakImplHelper< XSimpleTest >
+{
+public:
+ explicit OSaxParserTest( const Reference < XMultiServiceFactory > & rFactory ) : m_rFactory( rFactory )
+ {
+ }
+
+ virtual void SAL_CALL testInvariant(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject)
+ throw ( IllegalArgumentException, RuntimeException);
+
+ virtual sal_Int32 SAL_CALL test(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject,
+ sal_Int32 hTestHandle)
+ throw ( IllegalArgumentException,RuntimeException);
+
+ virtual sal_Bool SAL_CALL testPassed() throw (RuntimeException);
+ virtual Sequence< OUString > SAL_CALL getErrors() throw (RuntimeException);
+ virtual Sequence< Any > SAL_CALL getErrorExceptions() throw (RuntimeException);
+ virtual Sequence< OUString > SAL_CALL getWarnings() throw (RuntimeException);
+
+private:
+ void testSimple( const Reference < XParser > &r );
+ void testNamespaces( const Reference < XParser > &r );
+ void testFile( const Reference < XParser > &r );
+ void testEncoding( const Reference < XParser > &rParser );
+ void testPerformance( const Reference < XParser > &rParser );
+
+ Sequence<Any> m_seqExceptions;
+ Sequence<OUString> m_seqErrors;
+ Sequence<OUString> m_seqWarnings;
+ Reference < XMultiServiceFactory > m_rFactory;
+};
+
+/// @note for external binding
+Reference < XInterface > SAL_CALL OSaxParserTest_CreateInstance( const Reference < XMultiServiceFactory > & rSMgr ) throw(Exception)
+{
+ OSaxParserTest *p = new OSaxParserTest( rSMgr );
+ return Reference < XInterface > ( (static_cast< OWeakObject * >(p)) );
+}
+
+OUString OSaxParserTest_getServiceName( ) throw ()
+{
+ return OUString( "test.com.sun.star.xml.sax.Parser" );
+}
+
+OUString OSaxParserTest_getImplementationName( ) throw ()
+{
+ return OUString( "test.extensions.xml.sax.Parser");
+}
+
+Sequence<OUString> OSaxParserTest_getSupportedServiceNames( ) throw ()
+{
+ Sequence<OUString> aRet { OSaxParserTest_getImplementationName() };
+ return aRet;
+}
+
+void OSaxParserTest::testInvariant(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject )
+ throw ( IllegalArgumentException, RuntimeException)
+{
+ if( OUString( "com.sun.star.xml.sax.Parser") == TestName ) {
+ Reference < XParser > parser( TestObject , UNO_QUERY );
+
+ ERROR_ASSERT( parser.is() , "XDataInputStream cannot be queried" );
+ }
+}
+
+sal_Int32 OSaxParserTest::test(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject,
+ sal_Int32 hTestHandle)
+ throw ( IllegalArgumentException, RuntimeException)
+{
+ if( OUString( "com.sun.star.xml.sax.Parser") == TestName ) {
+ try
+ {
+ if( 0 == hTestHandle ) {
+ testInvariant( TestName , TestObject );
+ }
+ else {
+ Reference < XParser > parser( TestObject , UNO_QUERY );
+
+ if( 1 == hTestHandle ) {
+ testSimple( parser );
+ }
+ else if( 2 == hTestHandle ) {
+ testNamespaces( parser );
+ }
+ else if( 3 == hTestHandle ) {
+ testEncoding( parser );
+ }
+ else if( 4 == hTestHandle ) {
+ testFile( parser );
+ }
+ else if( 5 == hTestHandle ) {
+ testPerformance( parser );
+ }
+ }
+ }
+ catch( Exception & e )
+ {
+ OString o = OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US);
+ BUILD_ERROR( 0 , o.getStr() );
+ }
+ catch( ... )
+ {
+ BUILD_ERROR( 0 , "unknown exception (Exception is not base class)" );
+ }
+
+ hTestHandle ++;
+
+ if( hTestHandle >= 6) {
+ // all tests finished.
+ hTestHandle = -1;
+ }
+ }
+ else {
+ BUILD_ERROR( 0 , "service not supported by test." );
+ }
+ return hTestHandle;
+}
+
+sal_Bool OSaxParserTest::testPassed() throw (RuntimeException)
+{
+ return m_seqErrors.getLength() == 0;
+}
+
+Sequence< OUString > OSaxParserTest::getErrors() throw (RuntimeException)
+{
+ return m_seqErrors;
+}
+
+Sequence< Any > OSaxParserTest::getErrorExceptions() throw (RuntimeException)
+{
+ return m_seqExceptions;
+}
+
+Sequence< OUString > OSaxParserTest::getWarnings() throw (RuntimeException)
+{
+ return m_seqWarnings;
+}
+
+Reference < XInputStream > createStreamFromSequence(
+ const Sequence<sal_Int8> seqBytes ,
+ const Reference < XMultiServiceFactory > &xSMgr )
+{
+ Reference < XInterface > xOutStreamService =
+ xSMgr->createInstance("com.sun.star.io.Pipe");
+ OSL_ASSERT( xOutStreamService.is() );
+ Reference< XOutputStream > rOutStream( xOutStreamService , UNO_QUERY );
+ OSL_ASSERT( rOutStream.is() );
+
+ Reference< XInputStream > rInStream( xOutStreamService , UNO_QUERY );
+ OSL_ASSERT( rInStream.is() );
+
+ rOutStream->writeBytes( seqBytes );
+ rOutStream->flush();
+ rOutStream->closeOutput();
+
+ return rInStream;
+}
+
+Reference< XInputStream > createStreamFromFile(
+ const char *pcFile ,
+ const Reference < XMultiServiceFactory > &xSMgr )
+{
+ FILE *f = fopen( pcFile , "rb" );
+ Reference< XInputStream > r;
+
+ if( f ) {
+ fseek( f , 0 , SEEK_END );
+ int nLength = ftell( f );
+ fseek( f , 0 , SEEK_SET );
+
+ Sequence<sal_Int8> seqIn(nLength);
+ fread( seqIn.getArray() , nLength , 1 , f );
+
+ r = createStreamFromSequence( seqIn , xSMgr );
+ fclose( f );
+ }
+ return r;
+}
+
+class TestDocumentHandler :
+ public WeakImplHelper< XExtendedDocumentHandler , XEntityResolver , XErrorHandler >
+{
+public:
+ TestDocumentHandler( const Reference < XMultiServiceFactory > &r , sal_Bool bPrint )
+ : m_bPrint(bPrint), m_xSMgr(r)
+ {
+ }
+
+ // Error handler
+ virtual void SAL_CALL error(const Any& aSAXParseException) throw (SAXException, RuntimeException)
+ {
+ printf( "Error !\n" );
+ throw SAXException(
+ OUString( "error from error handler") ,
+ Reference < XInterface >() ,
+ aSAXParseException );
+ }
+ virtual void SAL_CALL fatalError(const Any& aSAXParseException) throw (SAXException, RuntimeException)
+ {
+ printf( "Fatal Error !\n" );
+ }
+ virtual void SAL_CALL warning(const Any& aSAXParseException) throw (SAXException, RuntimeException)
+ {
+ printf( "Warning !\n" );
+ }
+
+ // ExtendedDocumentHandler
+ virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException)
+ {
+ m_iLevel = 0;
+ m_iElementCount = 0;
+ m_iAttributeCount = 0;
+ m_iWhitespaceCount =0;
+ m_iCharCount=0;
+ if( m_bPrint ) {
+ printf( "document started\n" );
+ }
+ }
+ virtual void SAL_CALL endDocument() throw (SAXException, RuntimeException)
+ {
+ if( m_bPrint ) {
+ printf( "document finished\n" );
+ printf( "(ElementCount %d),(AttributeCount %d),(WhitespaceCount %d),(CharCount %d)\n",
+ m_iElementCount, m_iAttributeCount, m_iWhitespaceCount , m_iCharCount );
+ }
+ }
+ virtual void SAL_CALL startElement(const OUString& aName,
+ const Reference< XAttributeList > & xAttribs)
+ throw (SAXException,RuntimeException)
+ {
+ if( m_rLocator.is() ) {
+ if( m_bPrint )
+ {
+ OString o = OUStringToOString( m_rLocator->getSystemId() , RTL_TEXTENCODING_UTF8 );
+ printf( "%s(%d):" , o.getStr() , m_rLocator->getLineNumber() );
+ }
+ }
+ if( m_bPrint ) {
+ int i;
+ for( i = 0; i < m_iLevel ; i ++ ) {
+ printf( " " );
+ }
+ OString o = OUStringToOString(aName , RTL_TEXTENCODING_UTF8 );
+ printf( "<%s> " , aName.getStr() );
+
+ for( i = 0 ; i < xAttribs->getLength() ; i ++ )
+ {
+ OString o1 = OUStringToOString(xAttribs->getNameByIndex( i ), RTL_TEXTENCODING_UTF8 );
+ OString o2 = OUStringToOString(xAttribs->getTypeByIndex( i ), RTL_TEXTENCODING_UTF8 );
+ OString o3 = OUStringToOString(xAttribs->getValueByIndex( i ) , RTL_TEXTENCODING_UTF8 );
+ printf( "(%s,%s,'%s')" , o1.getStr(), o2.getStr(), o3.getStr() );
+ }
+ printf( "\n" );
+ }
+ m_iLevel ++;
+ m_iElementCount ++;
+ m_iAttributeCount += xAttribs->getLength();
+ }
+
+ virtual void SAL_CALL endElement(const OUString& aName) throw (SAXException,RuntimeException)
+ {
+ OSL_ASSERT( m_iLevel );
+ m_iLevel --;
+ if( m_bPrint ) {
+ int i;
+ for( i = 0; i < m_iLevel ; i ++ ) {
+ printf( " " );
+ }
+ OString o = OUStringToOString(aName , RTL_TEXTENCODING_UTF8 );
+ printf( "</%s>\n" , o.getStr() );
+ }
+ }
+
+ virtual void SAL_CALL characters(const OUString& aChars) throw (SAXException,RuntimeException)
+ {
+ if( m_bPrint ) {
+ int i;
+ for( i = 0; i < m_iLevel ; i ++ ) {
+ printf( " " );
+ }
+ OString o = OUStringToOString(aChars , RTL_TEXTENCODING_UTF8 );
+ printf( "%s\n" , o.getStr() );
+ }
+ m_iCharCount += aChars.getLength();
+ }
+
+ virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) throw (SAXException,RuntimeException)
+ {
+ m_iWhitespaceCount += aWhitespaces.getLength();
+ }
+
+ virtual void SAL_CALL processingInstruction(const OUString& aTarget, const OUString& aData) throw (SAXException,RuntimeException)
+ {
+ if( m_bPrint )
+ {
+ OString o1 = OUStringToOString(aTarget, RTL_TEXTENCODING_UTF8 );
+ OString o2 = OUStringToOString(aData, RTL_TEXTENCODING_UTF8 );
+ printf( "PI : %s,%s\n" , o1.getStr() , o2.getStr() );
+ }
+ }
+
+ virtual void SAL_CALL setDocumentLocator(const Reference< XLocator> & xLocator)
+ throw (SAXException,RuntimeException)
+ {
+ m_rLocator = xLocator;
+ }
+
+ virtual InputSource SAL_CALL resolveEntity(
+ const OUString& sPublicId,
+ const OUString& sSystemId)
+ throw (SAXException,RuntimeException)
+ {
+ InputSource source;
+ source.sSystemId = sSystemId;
+ source.sPublicId = sPublicId;
+
+ source.aInputStream = createStreamFromFile(
+ OUStringToOString( sSystemId , RTL_TEXTENCODING_ASCII_US) , m_xSMgr );
+
+ return source;
+ }
+
+ virtual void SAL_CALL startCDATA() throw (SAXException,RuntimeException)
+ {
+ if( m_bPrint ) {
+ printf( "CDataStart :\n" );
+ }
+ }
+ virtual void SAL_CALL endCDATA() throw (SAXException,RuntimeException)
+ {
+ if( m_bPrint ) {
+ printf( "CEndStart :\n" );
+ }
+ }
+ virtual void SAL_CALL comment(const OUString& sComment) throw (SAXException,RuntimeException)
+ {
+ if( m_bPrint ) {
+ OString o1 = OUStringToOString(sComment, RTL_TEXTENCODING_UTF8 );
+ printf( "<!--%s-->\n" , o1.getStr() );
+ }
+ }
+ virtual void SAL_CALL unknown(const OUString& sString) throw (SAXException,RuntimeException)
+ {
+ if( m_bPrint )
+ {
+ OString o1 = OUStringToOString(sString, RTL_TEXTENCODING_UTF8 );
+ printf( "UNKNOWN : {%s}\n" , o1.getStr() );
+ }
+ }
+
+ virtual void SAL_CALL allowLineBreak() throw (SAXException, RuntimeException )
+ {
+
+ }
+
+ int m_iLevel;
+ int m_iElementCount;
+ int m_iAttributeCount;
+ int m_iWhitespaceCount;
+ int m_iCharCount;
+ sal_Bool m_bPrint;
+
+ Reference < XMultiServiceFactory > m_xSMgr;
+ Reference < XLocator > m_rLocator;
+};
+
+void OSaxParserTest::testSimple( const Reference < XParser > &rParser )
+{
+ char TestString[] = "<!DOCTYPE personnel [\n"
+ "<!ENTITY testInternal \"internal Test!\">\n"
+ "<!ENTITY test SYSTEM \"external_entity.xml\">\n"
+ "]>\n"
+ "<personnel>\n"
+ "<person> fjklsfdklsdfkl\n"
+ "fjklsfdklsdfkl\n"
+ "<?testpi pidata?>\n"
+ "&testInternal;\n"
+ "<HUHU x='5' y='kjfd'> blahuhu\n"
+ "<HI> blahi\n"
+ " <![CDATA[<greeting>Hello, '+1+12world!</greeting>]]>\n"
+ " <!-- huhu <jdk> -->\n"
+ "<?testpi pidata?>\n"
+ "</HI>\n"
+ "aus XMLTest\n"
+ "</HUHU>\n"
+ "</person>\n"
+ "</personnel>\n\n\n";
+
+ Sequence< sal_Int8> seqBytes( strlen( TestString ) );
+ memcpy( seqBytes.getArray() , TestString , strlen( TestString ) );
+
+ Reference< XInputStream > rInStream;
+ OUString sInput;
+ rInStream = createStreamFromSequence( seqBytes , m_rFactory );
+ sInput = "internal";
+
+ if( rParser.is() ) {
+ InputSource source;
+
+ source.aInputStream = rInStream;
+ source.sSystemId = sInput;
+
+ TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False );
+ Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY );
+ Reference< XEntityResolver >
+ rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY );
+
+ rParser->setDocumentHandler( rDocHandler );
+ rParser->setEntityResolver( rEntityResolver );
+
+ try
+ {
+ rParser->parseStream( source );
+ ERROR_ASSERT( pDocHandler->m_iElementCount == 4 , "wrong element count" );
+ ERROR_ASSERT( pDocHandler->m_iAttributeCount == 2 , "wrong attribute count" );
+ ERROR_ASSERT( pDocHandler->m_iCharCount == 130 , "wrong char count" );
+ ERROR_ASSERT( pDocHandler->m_iWhitespaceCount == 0, "wrong whitespace count" );
+ }
+ catch( SAXParseException & e )
+ {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ BUILD_ERROR( 1 , o1.getStr() );
+ }
+ catch( SAXException & e )
+ {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ BUILD_ERROR( 1 , o1.getStr() );
+ }
+ catch( Exception & e )
+ {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ BUILD_ERROR( 1 , o1.getStr() );
+ }
+ catch( ... )
+ {
+ BUILD_ERROR( 1 , "unknown exception" );
+ }
+ }
+}
+
+void OSaxParserTest::testNamespaces( const Reference < XParser > &rParser )
+{
+
+ char TestString[] =
+ "<?xml version='1.0'?>\n"
+ "<!-- all elements here are explicitly in the HTML namespace -->\n"
+ "<html:html xmlns:html='http://www.w3.org/TR/REC-html40'>\n"
+ "<html:head><html:title>Frobnostication</html:title></html:head>\n"
+ "<html:body><html:p>Moved to \n"
+ "<html:a href='http://frob.com'>here.</html:a></html:p></html:body>\n"
+ "</html:html>\n";
+
+ Sequence<sal_Int8> seqBytes( strlen( TestString ) );
+ memcpy( seqBytes.getArray() , TestString , strlen( TestString ) );
+
+ Reference< XInputStream > rInStream;
+ OUString sInput;
+
+ rInStream = createStreamFromSequence( seqBytes , m_rFactory );
+ sInput = "internal";
+
+ if( rParser.is() ) {
+ InputSource source;
+
+ source.aInputStream = rInStream;
+ source.sSystemId = sInput;
+
+ TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False );
+ Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY );
+ Reference< XEntityResolver > rEntityResolver(
+ (XEntityResolver *) pDocHandler , UNO_QUERY );
+
+ rParser->setDocumentHandler( rDocHandler );
+ rParser->setEntityResolver( rEntityResolver );
+
+ try
+ {
+ rParser->parseStream( source );
+ ERROR_ASSERT( pDocHandler->m_iElementCount == 6 , "wrong element count" );
+ ERROR_ASSERT( pDocHandler->m_iAttributeCount == 2 , "wrong attribute count" );
+ ERROR_ASSERT( pDocHandler->m_iCharCount == 33, "wrong char count" );
+ ERROR_ASSERT( pDocHandler->m_iWhitespaceCount == 0 , "wrong whitespace count" );
+ }
+ catch( Exception & e ) {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ BUILD_ERROR( 1 , o1.getStr() );
+ }
+ catch( ... )
+ {
+ BUILD_ERROR( 1 , "unknown exception" );
+ }
+ }
+}
+
+void OSaxParserTest::testEncoding( const Reference < XParser > &rParser )
+{
+ char TestString[] =
+ "<?xml version='1.0' encoding=\"iso-8859-1\"?>\n"
+ "<!-- all elements here are explicitly in the HTML namespace -->\n"
+ "<html:html xmlns:html='http://www.w3.org/TR/REC-html40'>\n"
+ "<html:head><html:title>Frobnostication</html:title></html:head>\n"
+ "<html:body><html:p>Moved to \337\n"
+ "<html:a href='http://frob.com'>here.</html:a></html:p></html:body>\n"
+ "</html:html>\n";
+
+ Sequence<sal_Int8> seqBytes( strlen( TestString ) );
+ memcpy( seqBytes.getArray() , TestString , strlen( TestString ) );
+
+ Reference< XInputStream > rInStream;
+ OUString sInput;
+
+ rInStream = createStreamFromSequence( seqBytes , m_rFactory );
+ sInput = "internal";
+
+ if( rParser.is() ) {
+ InputSource source;
+
+ source.aInputStream = rInStream;
+ source.sSystemId = sInput;
+
+ TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False );
+ Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY );
+ Reference< XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY );
+
+ rParser->setDocumentHandler( rDocHandler );
+ rParser->setEntityResolver( rEntityResolver );
+ try
+ {
+ rParser->parseStream( source );
+ }
+ catch( Exception & e )
+ {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ BUILD_ERROR( 1 , o1.getStr() );
+ }
+ catch ( ... )
+ {
+ BUILD_ERROR( 1 , "unknown exception" );
+ }
+ }
+}
+
+void OSaxParserTest::testFile( const Reference < XParser > & rParser )
+{
+
+ Reference< XInputStream > rInStream = createStreamFromFile( "testsax.xml" , m_rFactory );
+ OUString sInput = "testsax.xml";
+
+ if( rParser.is() && rInStream.is() ) {
+ InputSource source;
+
+ source.aInputStream = rInStream;
+ source.sSystemId = sInput;
+
+ TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_True );
+ Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY );
+ Reference < XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY );
+ Reference < XErrorHandler > rErrorHandler( ( XErrorHandler * )pDocHandler , UNO_QUERY );
+
+ rParser->setDocumentHandler( rDocHandler );
+ rParser->setEntityResolver( rEntityResolver );
+ rParser->setErrorHandler( rErrorHandler );
+
+ try
+ {
+ rParser->parseStream( source );
+ }
+ catch( SAXParseException & e ) {
+ Any any;
+ any <<= e;
+
+ while(true) {
+ SAXParseException *pEx;
+ if( any.getValueType() == cppu::UnoType<decltype(e)>::get() ) {
+ pEx = ( SAXParseException * ) any.getValue();
+ OString o1 = OUStringToOString(pEx->Message, RTL_TEXTENCODING_UTF8 );
+ printf( "%s\n" , o1.getStr() );
+ any = pEx->WrappedException;
+ }
+ else {
+ break;
+ }
+ }
+ }
+ catch( SAXException & e )
+ {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ BUILD_ERROR( 1 , o1.getStr() );
+
+ }
+ catch( Exception & e ) {
+ printf( "normal exception ! %s\n", e.Message );
+ }
+ catch ( ... )
+ {
+ printf( "any exception !!!!\n" );
+ }
+ }
+}
+
+void OSaxParserTest::testPerformance( const Reference < XParser > & rParser )
+{
+ Reference < XInputStream > rInStream =
+ createStreamFromFile( "testPerformance.xml" , m_rFactory );
+ OUString sInput = "testperformance.xml";
+
+ if( rParser.is() && rInStream.is() ) {
+ InputSource source;
+
+ source.aInputStream = rInStream;
+ source.sSystemId = sInput;
+
+ TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False );
+ Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY );
+ Reference < XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY );
+ Reference < XErrorHandler > rErrorHandler( ( XErrorHandler * )pDocHandler , UNO_QUERY );
+
+ rParser->setDocumentHandler( rDocHandler );
+ rParser->setEntityResolver( rEntityResolver );
+ rParser->setErrorHandler( rErrorHandler );
+
+ try
+ {
+ TimeValue aStartTime, aEndTime;
+ osl_getSystemTime( &aStartTime );
+ rParser->parseStream( source );
+ osl_getSystemTime( &aEndTime );
+
+ double fStart = (double)aStartTime.Seconds + ((double)aStartTime.Nanosec / 1000000000.0);
+ double fEnd = (double)aEndTime.Seconds + ((double)aEndTime.Nanosec / 1000000000.0);
+
+ printf( "Performance reading : %g s\n" , fEnd - fStart );
+
+ }
+ catch( SAXParseException &e ) {
+ Any any;
+ any <<= e;
+ while(true) {
+ if( any.getValueType() == cppu::UnoType<decltype(e)>::get() ) {
+ SAXParseException ex;
+ any >>= ex;
+ OString o = OUStringToOString( ex.Message , RTL_TEXTENCODING_ASCII_US );
+ printf( "%s\n" , o.getStr() );
+ any <<= ex.WrappedException;
+ }
+ else {
+ break;
+ }
+ }
+ }
+ catch( SAXException &e ) {
+ OString o = OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US );
+ printf( "%s\n" , o.getStr() );
+
+ }
+ catch( ... )
+ {
+ printf( "any exception !!!!\n" );
+ }
+ }
+}
+} // namespace
+
+using namespace sax_test;
+
+extern "C"
+{
+
+sal_Bool SAL_CALL component_writeInfo(
+ void * pServiceManager, void * pRegistryKey )
+{
+ if (pRegistryKey)
+ {
+ try
+ {
+ Reference< XRegistryKey > xKey(
+ reinterpret_cast< XRegistryKey * >( pRegistryKey ) );
+
+ OUString str =
+ OUString( "/" ) +
+ OSaxParserTest_getImplementationName() +
+ OUString( "/UNO/SERVICES" );
+ Reference< XRegistryKey > xNewKey = xKey->createKey( str );
+ xNewKey->createKey( OSaxParserTest_getServiceName() );
+
+ str =
+ OUString( "/" ) +
+ OSaxWriterTest_getImplementationName() +
+ OUString( "/UNO/SERVICES" );
+
+ xNewKey = xKey->createKey( str );
+ xNewKey->createKey( OSaxWriterTest_getServiceName() );
+
+ return sal_True;
+ }
+ catch (InvalidRegistryException &)
+ {
+ OSL_FAIL( "### InvalidRegistryException!" );
+ }
+ }
+ return sal_False;
+}
+
+SAL_DLLPUBLIC_EXPORT void * SAL_CALL component_getFactory(
+ const char * pImplName, void * pServiceManager, void * pRegistryKey )
+{
+ void * pRet = 0;
+
+ if (pServiceManager )
+ {
+ Reference< XSingleServiceFactory > xRet;
+ Reference< XMultiServiceFactory > xSMgr =
+ reinterpret_cast< XMultiServiceFactory * > ( pServiceManager );
+
+ OUString aImplementationName = OUString::createFromAscii( pImplName );
+
+
+ if (aImplementationName == OSaxWriterTest_getImplementationName() )
+ {
+ xRet = createSingleFactory( xSMgr, aImplementationName,
+ OSaxWriterTest_CreateInstance,
+ OSaxWriterTest_getSupportedServiceNames() );
+ }
+ else if (aImplementationName == OSaxParserTest_getImplementationName() )
+ {
+ xRet = createSingleFactory( xSMgr, aImplementationName,
+ OSaxParserTest_CreateInstance,
+ OSaxParserTest_getSupportedServiceNames() );
+ }
+ if (xRet.is())
+ {
+ xRet->acquire();
+ pRet = xRet.get();
+ }
+ }
+ return pRet;
+}
+
+} // extern C
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/test/sax/testwriter.cxx b/sax/test/sax/testwriter.cxx
new file mode 100644
index 000000000..2a5d3706d
--- /dev/null
+++ b/sax/test/sax/testwriter.cxx
@@ -0,0 +1,662 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#include <vector>
+#include <stdio.h>
+
+#include <com/sun/star/test/XSimpleTest.hpp>
+#include <com/sun/star/lang/XMultiServiceFactory.hpp>
+
+#include <com/sun/star/io/XActiveDataSource.hpp>
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+#include <com/sun/star/xml/sax/XParser.hpp>
+#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
+
+#include <osl/time.h>
+
+#include <cppuhelper/factory.hxx>
+#include <cppuhelper/implbase.hxx>
+
+
+using namespace ::std;
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::test;
+using namespace ::com::sun::star::registry;
+using namespace ::com::sun::star::io;
+using namespace ::com::sun::star::xml::sax;
+
+namespace sax_test {
+
+class OFileWriter :
+ public WeakImplHelper< XOutputStream >
+{
+public:
+ explicit OFileWriter( char *pcFile ) { strncpy( m_pcFile, pcFile, 256 - 1 ); m_f = 0; }
+
+
+public:
+ virtual void SAL_CALL writeBytes(const Sequence< sal_Int8 >& aData)
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException);
+ virtual void SAL_CALL flush()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException);
+ virtual void SAL_CALL closeOutput()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException);
+private:
+ char m_pcFile[256];
+ FILE *m_f;
+};
+
+
+void OFileWriter::writeBytes(const Sequence< sal_Int8 >& aData)
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException)
+{
+ if( ! m_f ) {
+ m_f = fopen( m_pcFile , "w" );
+ }
+
+ fwrite( aData.getConstArray() , 1 , aData.getLength() , m_f );
+}
+
+
+void OFileWriter::flush()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException)
+{
+ fflush( m_f );
+}
+
+void OFileWriter::closeOutput()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException)
+{
+ fclose( m_f );
+ m_f = 0;
+}
+
+
+class OSaxWriterTest :
+ public WeakImplHelper< XSimpleTest >
+{
+public:
+ explicit OSaxWriterTest( const Reference < XMultiServiceFactory > & rFactory ) : m_rFactory( rFactory )
+ {
+
+ }
+ ~OSaxWriterTest() {}
+
+
+public:
+ virtual void SAL_CALL testInvariant(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject)
+ throw ( IllegalArgumentException,
+ RuntimeException);
+
+ virtual sal_Int32 SAL_CALL test(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject,
+ sal_Int32 hTestHandle)
+ throw ( IllegalArgumentException,RuntimeException);
+
+ virtual sal_Bool SAL_CALL testPassed()
+ throw ( RuntimeException);
+ virtual Sequence< OUString > SAL_CALL getErrors() throw (RuntimeException);
+ virtual Sequence< Any > SAL_CALL getErrorExceptions() throw (RuntimeException);
+ virtual Sequence< OUString > SAL_CALL getWarnings() throw (RuntimeException);
+
+private:
+ void testSimple( const Reference< XExtendedDocumentHandler > &r );
+ void testExceptions( const Reference< XExtendedDocumentHandler > &r );
+ void testDTD( const Reference< XExtendedDocumentHandler > &r );
+ void testPerformance( const Reference< XExtendedDocumentHandler > &r );
+ void writeParagraph( const Reference< XExtendedDocumentHandler > &r , const OUString & s);
+
+private:
+ Sequence<Any> m_seqExceptions;
+ Sequence<OUString> m_seqErrors;
+ Sequence<OUString> m_seqWarnings;
+ Reference < XMultiServiceFactory > m_rFactory;
+
+};
+
+
+/*----------------------------------------
+*
+* Attributelist implementation
+*
+*----------------------------------------*/
+struct AttributeListImpl_impl;
+class AttributeListImpl : public WeakImplHelper< XAttributeList >
+{
+public:
+ AttributeListImpl();
+ AttributeListImpl( const AttributeListImpl & );
+ ~AttributeListImpl();
+
+public:
+ virtual sal_Int16 SAL_CALL getLength() throw (RuntimeException);
+ virtual OUString SAL_CALL getNameByIndex(sal_Int16 i) throw (RuntimeException);
+ virtual OUString SAL_CALL getTypeByIndex(sal_Int16 i) throw (RuntimeException);
+ virtual OUString SAL_CALL getTypeByName(const OUString& aName) throw (RuntimeException);
+ virtual OUString SAL_CALL getValueByIndex(sal_Int16 i) throw (RuntimeException);
+ virtual OUString SAL_CALL getValueByName(const OUString& aName) throw (RuntimeException);
+
+public:
+ void addAttribute( const OUString &sName ,
+ const OUString &sType ,
+ const OUString &sValue );
+ void clear();
+
+private:
+ struct AttributeListImpl_impl *m_pImpl;
+};
+
+
+struct TagAttribute
+{
+ TagAttribute(){}
+ TagAttribute( const OUString &sName,
+ const OUString &sType ,
+ const OUString &sValue )
+ {
+ sName = sName;
+ sType = sType;
+ sValue = sValue;
+ }
+
+ OUString sName;
+ OUString sType;
+ OUString sValue;
+};
+
+struct AttributeListImpl_impl
+{
+ AttributeListImpl_impl()
+ {
+ // performance improvement during adding
+ vecAttribute.reserve(20);
+ }
+ vector<struct TagAttribute> vecAttribute;
+};
+
+
+sal_Int16 AttributeListImpl::getLength() throw (RuntimeException)
+{
+ return m_pImpl->vecAttribute.size();
+}
+
+
+AttributeListImpl::AttributeListImpl( const AttributeListImpl &r )
+{
+ m_pImpl = new AttributeListImpl_impl;
+ *m_pImpl = *(r.m_pImpl);
+}
+
+OUString AttributeListImpl::getNameByIndex(sal_Int16 i) throw (RuntimeException)
+{
+ if( i < m_pImpl->vecAttribute.size() ) {
+ return m_pImpl->vecAttribute[i].sName;
+ }
+ return OUString();
+}
+
+
+OUString AttributeListImpl::getTypeByIndex(sal_Int16 i) throw (RuntimeException)
+{
+ if( i < m_pImpl->vecAttribute.size() ) {
+ return m_pImpl->vecAttribute[i].sType;
+ }
+ return OUString();
+}
+
+OUString AttributeListImpl::getValueByIndex(sal_Int16 i) throw (RuntimeException)
+{
+ if( i < m_pImpl->vecAttribute.size() ) {
+ return m_pImpl->vecAttribute[i].sValue;
+ }
+ return OUString();
+
+}
+
+OUString AttributeListImpl::getTypeByName( const OUString& sName ) throw (RuntimeException)
+{
+ auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(),
+ [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; });
+ if (ii != m_pImpl->vecAttribute.end())
+ return (*ii).sType;
+ return OUString();
+}
+
+OUString AttributeListImpl::getValueByName(const OUString& sName) throw (RuntimeException)
+{
+ auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(),
+ [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; });
+ if (ii != m_pImpl->vecAttribute.end())
+ return (*ii).sValue;
+ return OUString();
+}
+
+
+AttributeListImpl::AttributeListImpl()
+{
+ m_pImpl = new AttributeListImpl_impl;
+}
+
+
+AttributeListImpl::~AttributeListImpl()
+{
+ delete m_pImpl;
+}
+
+
+void AttributeListImpl::addAttribute( const OUString &sName ,
+ const OUString &sType ,
+ const OUString &sValue )
+{
+ m_pImpl->vecAttribute.push_back( TagAttribute( sName , sType , sValue ) );
+}
+
+void AttributeListImpl::clear()
+{
+ m_pImpl->vecAttribute.clear();
+
+}
+
+
+/**
+* for external binding
+*
+*
+**/
+Reference < XInterface > SAL_CALL OSaxWriterTest_CreateInstance( const Reference < XMultiServiceFactory > & rSMgr ) throw (Exception)
+{
+ OSaxWriterTest *p = new OSaxWriterTest( rSMgr );
+ Reference < XInterface > xService = *p;
+ return xService;
+}
+
+OUString OSaxWriterTest_getServiceName( ) throw ()
+{
+ return OUString( "test.com.sun.star.xml.sax.Writer");
+}
+
+OUString OSaxWriterTest_getImplementationName( ) throw ()
+{
+ return OUString( "test.extensions.xml.sax.Writer");
+}
+
+Sequence<OUString> OSaxWriterTest_getSupportedServiceNames( ) throw ()
+{
+ Sequence<OUString> aRet { OSaxWriterTest_getImplementationName( ) };
+ return aRet;
+}
+
+
+void OSaxWriterTest::testInvariant( const OUString& TestName,
+ const Reference < XInterface >& TestObject )
+ throw ( IllegalArgumentException, RuntimeException)
+{
+ if( OUString("com.sun.star.xml.sax.Writer") == TestName ) {
+ Reference< XDocumentHandler > doc( TestObject , UNO_QUERY );
+ Reference< XExtendedDocumentHandler > ext( TestObject , UNO_QUERY );
+ Reference< XActiveDataSource > source( TestObject , UNO_QUERY );
+
+ ERROR_ASSERT( doc.is() , "XDocumentHandler cannot be queried" );
+ ERROR_ASSERT( ext.is() , "XExtendedDocumentHandler cannot be queried" );
+ ERROR_ASSERT( source.is() , "XActiveDataSource cannot be queried" );
+ }
+ else {
+ BUILD_ERROR( 0 , "wrong test" );
+ }
+}
+
+
+sal_Int32 OSaxWriterTest::test(
+ const OUString& TestName,
+ const Reference < XInterface >& TestObject,
+ sal_Int32 hTestHandle)
+ throw ( IllegalArgumentException,RuntimeException)
+{
+ if( OUString( "com.sun.star.xml.sax.Writer") == TestName )
+ {
+ try
+ {
+ if( 0 == hTestHandle )
+ {
+ testInvariant( TestName , TestObject );
+ }
+ else
+ {
+ Reference< XExtendedDocumentHandler > writer( TestObject , UNO_QUERY );
+
+ if( 1 == hTestHandle ) {
+ testSimple( writer );
+ }
+ else if( 2 == hTestHandle ) {
+ testExceptions( writer );
+ }
+ else if( 3 == hTestHandle ) {
+ testDTD( writer );
+ }
+ else if( 4 == hTestHandle ) {
+ testPerformance( writer );
+ }
+ }
+ }
+ catch( Exception & e ) {
+ OString o = OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US );
+ BUILD_ERROR( 0 , o.getStr() );
+ }
+ catch( ... )
+ {
+ BUILD_ERROR( 0 , "unknown exception (Exception is not base class)" );
+ }
+
+ hTestHandle ++;
+
+ if( hTestHandle >= 5) {
+ // all tests finished.
+ hTestHandle = -1;
+ }
+ }
+ else {
+ BUILD_ERROR( 0 , "service not supported by test." );
+ }
+ return hTestHandle;
+}
+
+
+sal_Bool OSaxWriterTest::testPassed() throw (RuntimeException)
+{
+ return m_seqErrors.getLength() == 0;
+}
+
+
+Sequence< OUString > OSaxWriterTest::getErrors() throw (RuntimeException)
+{
+ return m_seqErrors;
+}
+
+
+Sequence< Any > OSaxWriterTest::getErrorExceptions() throw (RuntimeException)
+{
+ return m_seqExceptions;
+}
+
+
+Sequence< OUString > OSaxWriterTest::getWarnings() throw (RuntimeException)
+{
+ return m_seqWarnings;
+}
+
+void OSaxWriterTest::writeParagraph(
+ const Reference< XExtendedDocumentHandler > &r ,
+ const OUString & s)
+{
+ int nMax = s.getLength();
+ int nStart = 0;
+
+ Sequence<sal_uInt16> seq( s.getLength() );
+ memcpy( seq.getArray() , s.getStr() , s.getLength() * sizeof( sal_uInt16 ) );
+
+ for( int n = 1 ; n < nMax ; n++ ){
+ if( 32 == seq.getArray()[n] ) {
+ r->allowLineBreak();
+ r->characters( s.copy( nStart , n - nStart ) );
+ nStart = n;
+ }
+ }
+ r->allowLineBreak();
+ r->characters( s.copy( nStart , n - nStart ) );
+}
+
+
+void OSaxWriterTest::testSimple( const Reference< XExtendedDocumentHandler > &r )
+{
+ OUString testParagraph = OUString(
+ "This is a stupid test to check whether the SAXWriter possibly makes "
+ "line breaks halfway correctly or whether it writes the line to the "
+ "bitter end." );
+
+ OFileWriter *pw = new OFileWriter("output.xml");
+ AttributeListImpl *pList = new AttributeListImpl;
+
+ Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY );
+ Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY );
+
+ Reference< XActiveDataSource > source( r , UNO_QUERY );
+
+ ERROR_ASSERT( ref.is() , "no output stream" );
+ ERROR_ASSERT( source.is() , "no active data source" );
+
+ source->setOutputStream( ref );
+
+ r->startDocument();
+
+ pList->addAttribute( OUString( "Arg1" ),
+ OUString( "CDATA") ,
+ OUString( "bla\n u") );
+ pList->addAttribute( OUString( "Arg2") ,
+ OUString( "CDATA") ,
+ OUString( "blub") );
+
+ r->startElement( OUString( "tag1") , rList );
+ r->ignorableWhitespace( OUString() );
+
+ r->characters( OUString( "huhu") );
+ r->ignorableWhitespace( OUString() );
+
+ r->startElement( OUString( "hi") , rList );
+ r->ignorableWhitespace( OUString() );
+
+ // the ampersand must be converted & -> &amp;
+ r->characters( OUString( "&#252;") );
+
+ // Test added for mib. Tests if errors during conversions occurs
+ r->ignorableWhitespace( OUString() );
+ char array[256];
+ for( sal_Int32 n = 32 ; n < 254 ; n ++ ) {
+ array[n-32] = n;
+ }
+ array[254-32] = 0;
+ r->characters(
+ OStringToOUString( array , RTL_TEXTENCODING_SYMBOL )
+ );
+ r->ignorableWhitespace( OUString() );
+
+ // '>' must not be converted
+ r->startCDATA();
+ r->characters( OUString( ">fsfsdf<") );
+ r->endCDATA();
+ r->ignorableWhitespace( OUString() );
+
+ writeParagraph( r , testParagraph );
+
+
+ r->ignorableWhitespace( OUString() );
+ r->comment( OUString( "This is a comment !") );
+ r->ignorableWhitespace( OUString() );
+
+ r->startElement( OUString( "emptytagtest") , rList );
+ r->endElement( OUString( "emptytagtest") );
+
+ r->endElement( OUString( "hi") );
+ r->ignorableWhitespace( OUString() );
+
+ r->endElement( OUString( "tag1") );
+ r->endDocument();
+
+}
+
+void OSaxWriterTest::testExceptions( const Reference< XExtendedDocumentHandler > & r )
+{
+
+ OFileWriter *pw = new OFileWriter("output2.xml");
+ AttributeListImpl *pList = new AttributeListImpl;
+
+ Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY );
+ Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY );
+
+ Reference< XActiveDataSource > source( r , UNO_QUERY );
+
+ ERROR_ASSERT( ref.is() , "no output stream" );
+ ERROR_ASSERT( source.is() , "no active data source" );
+
+ source->setOutputStream( ref );
+
+ { // startDocument must be called before start element
+ sal_Bool bException = sal_True;
+ try
+ {
+ r->startElement( OUString( "huhu") , rList );
+ bException = sal_False;
+ }
+ catch( SAXException &e )
+ {
+
+ }
+ ERROR_ASSERT( bException , "expected exception not thrown !" );
+ }
+
+ r->startDocument();
+
+ r->startElement( OUString( "huhu") , rList );
+ r->startCDATA();
+
+ {
+ sal_Bool bException = sal_True;
+ try{
+ r->startElement( OUString( "huhu") , rList );
+ bException = sal_False;
+ }
+ catch( SAXException &e ) {
+
+ }
+ ERROR_ASSERT( bException , "expected exception not thrown !" );
+ }
+
+ r->endCDATA();
+
+ {
+ sal_Unicode array[] = { 'a' , 'b' , 4 , 9 , 10 };
+ OUString o( array , 5 );
+ try
+ {
+ r->characters( o );
+ ERROR_ASSERT( 0 , "Writer allowed to write forbidden characters" );
+ }
+ catch( SAXException & e )
+ {
+
+ }
+ }
+ r->endElement( OUString( "huhu") );
+
+ r->endDocument();
+}
+
+
+void OSaxWriterTest::testDTD(const Reference< XExtendedDocumentHandler > &r )
+{
+ OFileWriter *pw = new OFileWriter("outputDTD.xml");
+ AttributeListImpl *pList = new AttributeListImpl;
+
+ Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY );
+ Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY );
+
+ Reference< XActiveDataSource > source( r , UNO_QUERY );
+
+ ERROR_ASSERT( ref.is() , "no output stream" );
+ ERROR_ASSERT( source.is() , "no active data source" );
+
+ source->setOutputStream( ref );
+
+
+ r->startDocument();
+ r->unknown( OUString( "<!DOCTYPE iCalendar >\n") );
+ r->startElement( OUString( "huhu") , rList );
+
+ r->endElement( OUString( "huhu") );
+ r->endDocument();
+}
+
+void OSaxWriterTest::testPerformance(const Reference< XExtendedDocumentHandler > &r )
+{
+ OFileWriter *pw = new OFileWriter("testPerformance.xml");
+ AttributeListImpl *pList = new AttributeListImpl;
+
+ OUString testParagraph =
+ OUString(
+ "This is a stupid test to check whether the SAXWriter possibly makes "
+ "line breaks halfway correctly or whether it writes the line to the "
+ "bitter end." );
+
+
+ Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY );
+ Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY );
+
+ Reference< XActiveDataSource > source( r , UNO_QUERY );
+
+ ERROR_ASSERT( ref.is() , "no output stream" );
+ ERROR_ASSERT( source.is() , "no active data source" );
+
+ source->setOutputStream( ref );
+
+ TimeValue aStartTime, aEndTime;
+ osl_getSystemTime( &aStartTime );
+
+
+ r->startDocument();
+ // just write a bunch of xml tags !
+ // for performance testing
+ sal_Int32 i2;
+ OUString huhu( "huhu" );
+ const int ITERATIONS = 125;
+ for( i2 = 0 ; i2 < ITERATIONS ; i2 ++ )
+ {
+ r->startElement( OUString( "tag" ) +
+ OUString::valueOf( i2 ), rList );
+ for( sal_Int32 i = 0 ; i < 450 ; i ++ )
+ {
+ r->ignorableWhitespace( "" );
+ r->startElement( huhu , rList );
+ r->characters( testParagraph );
+
+ r->ignorableWhitespace( "" );
+ r->endElement( huhu );
+ }
+ }
+ for( i2 = ITERATIONS-1 ; i2 >= 0 ; i2-- )
+ {
+ r->ignorableWhitespace( "" );
+ r->endElement( OUString( "tag" ) + OUString::valueOf( i2 ) );
+ }
+
+ r->endDocument();
+
+ osl_getSystemTime( &aEndTime );
+
+ double fStart = (double)aStartTime.Seconds + ((double)aStartTime.Nanosec / 1000000000.0);
+ double fEnd = (double)aEndTime.Seconds + ((double)aEndTime.Nanosec / 1000000000.0);
+
+ printf( "Performance writing : %g s\n" , fEnd - fStart );
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/test/saxdemo.cxx b/sax/test/saxdemo.cxx
new file mode 100644
index 000000000..7139d60ef
--- /dev/null
+++ b/sax/test/saxdemo.cxx
@@ -0,0 +1,626 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+// testcomponent - Loads a service and its testcomponent from dlls performs a test.
+// Expands the dll-names depending on the actual environment.
+// Example : testcomponent com.sun.star.io.Pipe stm
+
+// Therefore the testcode must exist in teststm and the testservice must be named test.com.sun.star.uno.io.Pipe
+
+
+#include <stdio.h>
+#include <vector>
+#include <cstring>
+
+#include <com/sun/star/registry/XImplementationRegistration.hpp>
+#include <com/sun/star/lang/XComponent.hpp>
+
+#include <com/sun/star/xml/sax/SAXParseException.hpp>
+#include <com/sun/star/xml/sax/XParser.hpp>
+#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
+
+#include <com/sun/star/io/XOutputStream.hpp>
+#include <com/sun/star/io/XActiveDataSource.hpp>
+
+#include <cppuhelper/servicefactory.hxx>
+#include <cppuhelper/implbase.hxx>
+
+
+using namespace ::std;
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::registry;
+using namespace ::com::sun::star::xml::sax;
+using namespace ::com::sun::star::io;
+
+
+/************
+ * Sequence of bytes -> InputStream
+ ************/
+class OInputStream : public WeakImplHelper < XInputStream >
+{
+public:
+ explicit OInputStream( const Sequence< sal_Int8 >&seq ) :
+ m_seq( seq ),
+ nPos( 0 )
+ {}
+
+public:
+ virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
+ throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
+ {
+ nBytesToRead = (nBytesToRead > m_seq.getLength() - nPos ) ?
+ m_seq.getLength() - nPos :
+ nBytesToRead;
+ aData = Sequence< sal_Int8 > ( &(m_seq.getConstArray()[nPos]) , nBytesToRead );
+ nPos += nBytesToRead;
+ return nBytesToRead;
+ }
+ virtual sal_Int32 SAL_CALL readSomeBytes(
+ css::uno::Sequence< sal_Int8 >& aData,
+ sal_Int32 nMaxBytesToRead )
+ throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
+ {
+ return readBytes( aData, nMaxBytesToRead );
+ }
+ virtual void SAL_CALL skipBytes( sal_Int32 /* nBytesToSkip */ )
+ throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
+ {
+ // not implemented
+ }
+ virtual sal_Int32 SAL_CALL available( )
+ throw(NotConnectedException, IOException, RuntimeException)
+ {
+ return m_seq.getLength() - nPos;
+ }
+ virtual void SAL_CALL closeInput( )
+ throw(NotConnectedException, IOException, RuntimeException)
+ {
+ // not needed
+ }
+ Sequence< sal_Int8> m_seq;
+ sal_Int32 nPos;
+};
+
+
+// Helper : create an input stream from a file
+
+Reference< XInputStream > createStreamFromFile(
+ const char *pcFile )
+{
+ FILE *f = fopen( pcFile , "rb" );
+ Reference< XInputStream > r;
+
+ if( f ) {
+ fseek( f , 0 , SEEK_END );
+ int nLength = ftell( f );
+ fseek( f , 0 , SEEK_SET );
+
+ Sequence<sal_Int8> seqIn(nLength);
+ fread( seqIn.getArray() , nLength , 1 , f );
+
+ r.set( new OInputStream( seqIn ) );
+ fclose( f );
+ }
+ return r;
+}
+
+
+// The document handler, which is needed for the saxparser
+// The Documenthandler for reading sax
+
+class TestDocumentHandler :
+ public WeakImplHelper< XExtendedDocumentHandler , XEntityResolver , XErrorHandler >
+{
+public:
+ TestDocumentHandler( )
+ {
+ }
+
+public: // Error handler
+ virtual void SAL_CALL error(const Any& aSAXParseException) throw (SAXException, RuntimeException)
+ {
+ printf( "Error !\n" );
+ throw SAXException(
+ OUString( "error from error handler") ,
+ Reference < XInterface >() ,
+ aSAXParseException );
+ }
+ virtual void SAL_CALL fatalError(const Any& /* aSAXParseException */) throw (SAXException, RuntimeException)
+ {
+ printf( "Fatal Error !\n" );
+ }
+ virtual void SAL_CALL warning(const Any& /* aSAXParseException */) throw (SAXException, RuntimeException)
+ {
+ printf( "Warning !\n" );
+ }
+
+
+public: // ExtendedDocumentHandler
+
+ virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException)
+ {
+ m_iElementCount = 0;
+ m_iAttributeCount = 0;
+ m_iWhitespaceCount =0;
+ m_iCharCount=0;
+ printf( "document started\n" );
+ }
+ virtual void SAL_CALL endDocument() throw (SAXException, RuntimeException)
+ {
+ printf( "document finished\n" );
+ printf( "(ElementCount %d),(AttributeCount %d),(WhitespaceCount %d),(CharCount %d)\n",
+ m_iElementCount, m_iAttributeCount, m_iWhitespaceCount , m_iCharCount );
+
+ }
+ virtual void SAL_CALL startElement(const OUString& /* aName */,
+ const Reference< XAttributeList > & xAttribs)
+ throw (SAXException,RuntimeException)
+ {
+ m_iElementCount ++;
+ m_iAttributeCount += xAttribs->getLength();
+ }
+
+ virtual void SAL_CALL endElement(const OUString& /* aName */) throw (SAXException,RuntimeException)
+ {
+ // ignored
+ }
+
+ virtual void SAL_CALL characters(const OUString& aChars) throw (SAXException,RuntimeException)
+ {
+ m_iCharCount += aChars.getLength();
+ }
+ virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) throw (SAXException,RuntimeException)
+ {
+ m_iWhitespaceCount += aWhitespaces.getLength();
+ }
+
+ virtual void SAL_CALL processingInstruction(const OUString& /* aTarget */, const OUString& /* aData */) throw (SAXException,RuntimeException)
+ {
+ // ignored
+ }
+
+ virtual void SAL_CALL setDocumentLocator(const Reference< XLocator> & /* xLocator */)
+ throw (SAXException,RuntimeException)
+ {
+ // ignored
+ }
+
+ virtual InputSource SAL_CALL resolveEntity(
+ const OUString& sPublicId,
+ const OUString& sSystemId)
+ throw (RuntimeException)
+ {
+ InputSource source;
+ source.sSystemId = sSystemId;
+ source.sPublicId = sPublicId;
+
+ source.aInputStream = createStreamFromFile(
+ OUStringToOString( sSystemId, RTL_TEXTENCODING_ASCII_US).getStr() );
+
+ return source;
+ }
+
+ virtual void SAL_CALL startCDATA() throw (SAXException,RuntimeException)
+ {
+ }
+ virtual void SAL_CALL endCDATA() throw (SAXException,RuntimeException)
+ {
+ }
+ virtual void SAL_CALL comment(const OUString& /* sComment */) throw (SAXException,RuntimeException)
+ {
+ }
+ virtual void SAL_CALL unknown(const OUString& /* sString */) throw (SAXException,RuntimeException)
+ {
+ }
+
+ virtual void SAL_CALL allowLineBreak() throw (SAXException, RuntimeException )
+ {
+
+ }
+
+public:
+ int m_iElementCount;
+ int m_iAttributeCount;
+ int m_iWhitespaceCount;
+ int m_iCharCount;
+};
+
+
+// helper implementation for writing
+// implements an XAttributeList
+
+struct AttributeListImpl_impl;
+class AttributeListImpl : public WeakImplHelper< XAttributeList >
+{
+public:
+ AttributeListImpl();
+ AttributeListImpl( const AttributeListImpl & );
+ ~AttributeListImpl();
+
+public:
+ virtual sal_Int16 SAL_CALL getLength() throw (RuntimeException);
+ virtual OUString SAL_CALL getNameByIndex(sal_Int16 i) throw (RuntimeException);
+ virtual OUString SAL_CALL getTypeByIndex(sal_Int16 i) throw (RuntimeException);
+ virtual OUString SAL_CALL getTypeByName(const OUString& aName) throw (RuntimeException);
+ virtual OUString SAL_CALL getValueByIndex(sal_Int16 i) throw (RuntimeException);
+ virtual OUString SAL_CALL getValueByName(const OUString& aName) throw (RuntimeException);
+
+public:
+ void addAttribute( const OUString &sName ,
+ const OUString &sType ,
+ const OUString &sValue );
+ void clear();
+
+private:
+ struct AttributeListImpl_impl *m_pImpl;
+};
+
+
+struct TagAttribute
+{
+ TagAttribute(){}
+ TagAttribute( const OUString &s_Name,
+ const OUString &s_Type ,
+ const OUString &s_Value )
+ : sName(s_Name),
+ sType(s_Type),
+ sValue(s_Value)
+ {
+ }
+
+ OUString sName;
+ OUString sType;
+ OUString sValue;
+};
+
+struct AttributeListImpl_impl
+{
+ AttributeListImpl_impl()
+ {
+ // performance improvement during adding
+ vecAttribute.reserve(20);
+ }
+ vector<struct TagAttribute> vecAttribute;
+};
+
+
+sal_Int16 AttributeListImpl::getLength() throw (RuntimeException)
+{
+ return (sal_Int16) m_pImpl->vecAttribute.size();
+}
+
+
+AttributeListImpl::AttributeListImpl( const AttributeListImpl &r )
+{
+ m_pImpl = new AttributeListImpl_impl;
+ *m_pImpl = *(r.m_pImpl);
+}
+
+OUString AttributeListImpl::getNameByIndex(sal_Int16 i) throw (RuntimeException)
+{
+ if( i < sal::static_int_cast<sal_Int16>(m_pImpl->vecAttribute.size()) ) {
+ return m_pImpl->vecAttribute[i].sName;
+ }
+ return OUString();
+}
+
+
+OUString AttributeListImpl::getTypeByIndex(sal_Int16 i) throw (RuntimeException)
+{
+ if( i < sal::static_int_cast<sal_Int16>(m_pImpl->vecAttribute.size()) ) {
+ return m_pImpl->vecAttribute[i].sType;
+ }
+ return OUString();
+}
+
+OUString AttributeListImpl::getValueByIndex(sal_Int16 i) throw (RuntimeException)
+{
+ if( i < sal::static_int_cast<sal_Int16>(m_pImpl->vecAttribute.size()) ) {
+ return m_pImpl->vecAttribute[i].sValue;
+ }
+ return OUString();
+
+}
+
+OUString AttributeListImpl::getTypeByName( const OUString& sName ) throw (RuntimeException)
+{
+ auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(),
+ [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; });
+ if (ii != m_pImpl->vecAttribute.end())
+ return (*ii).sType;
+ return OUString();
+}
+
+OUString AttributeListImpl::getValueByName(const OUString& sName) throw (RuntimeException)
+{
+ auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(),
+ [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; });
+ if (ii != m_pImpl->vecAttribute.end())
+ return (*ii).sValue;
+ return OUString();
+}
+
+
+AttributeListImpl::AttributeListImpl()
+{
+ m_pImpl = new AttributeListImpl_impl;
+}
+
+
+AttributeListImpl::~AttributeListImpl()
+{
+ delete m_pImpl;
+}
+
+
+void AttributeListImpl::addAttribute( const OUString &sName ,
+ const OUString &sType ,
+ const OUString &sValue )
+{
+ m_pImpl->vecAttribute.push_back( TagAttribute( sName , sType , sValue ) );
+}
+
+void AttributeListImpl::clear()
+{
+ m_pImpl->vecAttribute.clear();
+}
+
+
+// helper function for writing
+// ensures that linebreaks are inserted
+// when writing a long text.
+// Note: this implementation may be a bit slow,
+// but it shows, how the SAX-Writer handles the allowLineBreak calls.
+
+void writeParagraphHelper(
+ const Reference< XExtendedDocumentHandler > &r ,
+ const OUString & s)
+{
+ int nMax = s.getLength();
+ int nStart = 0;
+ int n = 1;
+
+ Sequence<sal_uInt16> seq( s.getLength() );
+ memcpy( seq.getArray() , s.getStr() , s.getLength() * sizeof( sal_uInt16 ) );
+
+ for( n = 1 ; n < nMax ; n++ ){
+ if( 32 == seq.getArray()[n] ) {
+ r->allowLineBreak();
+ r->characters( s.copy( nStart , n - nStart ) );
+ nStart = n;
+ }
+ }
+ r->allowLineBreak();
+ r->characters( s.copy( nStart , n - nStart ) );
+}
+
+
+// helper implementation for SAX-Writer
+// writes data to a file
+
+class OFileWriter :
+ public WeakImplHelper< XOutputStream >
+{
+public:
+ explicit OFileWriter( char *pcFile ) { strncpy( m_pcFile , pcFile, 256 - 1 ); m_f = 0; }
+
+
+public:
+ virtual void SAL_CALL writeBytes(const Sequence< sal_Int8 >& aData)
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException);
+ virtual void SAL_CALL flush()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException);
+ virtual void SAL_CALL closeOutput()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException);
+private:
+ char m_pcFile[256];
+ FILE *m_f;
+};
+
+
+void OFileWriter::writeBytes(const Sequence< sal_Int8 >& aData)
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException)
+{
+ if( ! m_f ) {
+ m_f = fopen( m_pcFile , "w" );
+ }
+
+ fwrite( aData.getConstArray() , 1 , aData.getLength() , m_f );
+}
+
+
+void OFileWriter::flush()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException)
+{
+ fflush( m_f );
+}
+
+void OFileWriter::closeOutput()
+ throw (NotConnectedException, BufferSizeExceededException, RuntimeException)
+{
+ fclose( m_f );
+ m_f = 0;
+}
+
+
+// Needed to switch on solaris threads
+#ifdef __sun
+extern "C" void ChangeGlobalInit();
+#endif
+int main (int argc, char **argv)
+{
+
+ if( argc < 3) {
+ printf( "usage : saxdemo inputfile outputfile\n" );
+ exit( 0 );
+ }
+#ifdef __sun
+ // switch on threads in solaris
+ ChangeGlobalInit();
+#endif
+
+ // create service manager
+ Reference< XMultiServiceFactory > xSMgr = createRegistryServiceFactory(
+ OUString( "applicat.rdb" ) );
+
+ Reference < XImplementationRegistration > xReg;
+ try
+ {
+ // Create registration service
+ Reference < XInterface > x = xSMgr->createInstance( "com.sun.star.registry.ImplementationRegistration" );
+ xReg.set( x , UNO_QUERY );
+ }
+ catch( Exception & ) {
+ printf( "Couldn't create ImplementationRegistration service\n" );
+ exit(1);
+ }
+
+ OString sTestName;
+ try
+ {
+ // Load dll for the tested component
+ OUString aDllName( "sax.uno" SAL_DLLEXTENSION );
+ xReg->registerImplementation(
+ OUString("com.sun.star.loader.SharedLibrary"),
+ aDllName,
+ Reference< XSimpleRegistry > () );
+ }
+ catch( Exception &e ) {
+ printf( "Couldn't reach sax dll\n" );
+ printf( "%s\n" , OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US ).getStr() );
+
+ exit(1);
+ }
+
+
+ // parser demo
+ // read xml from a file and count elements
+
+ Reference< XInterface > x = xSMgr->createInstance( "com.sun.star.xml.sax.Parser" );
+ if( x.is() )
+ {
+ Reference< XParser > rParser( x , UNO_QUERY );
+
+ // create and connect the document handler to the parser
+ TestDocumentHandler *pDocHandler = new TestDocumentHandler( );
+
+ Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler );
+ Reference< XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler );
+
+ rParser->setDocumentHandler( rDocHandler );
+ rParser->setEntityResolver( rEntityResolver );
+
+ // create the input stream
+ InputSource source;
+ source.aInputStream = createStreamFromFile( argv[1] );
+ source.sSystemId = OUString::createFromAscii( argv[1] );
+
+ try
+ {
+ // start parsing
+ rParser->parseStream( source );
+ }
+
+ catch( Exception & e )
+ {
+ OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 );
+ printf( "Exception during parsing : %s\n" , o1.getStr() );
+ }
+ }
+ else
+ {
+ printf( "couldn't create sax-parser component\n" );
+ }
+
+
+ // The SAX-Writer demo
+
+ x= xSMgr->createInstance("com.sun.star.xml.sax.Writer");
+ if( x.is() )
+ {
+ printf( "start writing to %s\n" , argv[2] );
+
+ OFileWriter *pw = new OFileWriter( argv[2] );
+ Reference< XActiveDataSource > source( x , UNO_QUERY );
+ source->setOutputStream( Reference< XOutputStream> ( (XOutputStream*) pw ) );
+
+ AttributeListImpl *pList = new AttributeListImpl;
+ Reference< XAttributeList > rList( (XAttributeList *) pList );
+
+ Reference< XExtendedDocumentHandler > r( x , UNO_QUERY );
+ r->startDocument();
+
+ pList->addAttribute( OUString( "Arg1" ),
+ OUString( "CDATA") ,
+ OUString( "foo\n u") );
+ pList->addAttribute( OUString( "Arg2") ,
+ OUString( "CDATA") ,
+ OUString( "foo2") );
+
+ r->startElement( OUString( "tag1") , rList );
+ // tells the writer to insert a linefeed
+ r->ignorableWhitespace( OUString() );
+
+ r->characters( OUString( "huhu") );
+ r->ignorableWhitespace( OUString() );
+
+ r->startElement( OUString( "hi") , rList );
+ r->ignorableWhitespace( OUString() );
+
+ // the enpassant must be converted & -> &amp;
+ r->characters( OUString( "&#252;") );
+ r->ignorableWhitespace( OUString() );
+
+ // '>' must not be converted
+ r->startCDATA();
+ r->characters( OUString( " > foo < ") );
+ r->endCDATA();
+ r->ignorableWhitespace( OUString() );
+
+ OUString testParagraph = OUString(
+ "This is only a test to check, if the writer inserts line feeds "
+ "if needed or if the writer puts the whole text into one line." );
+ writeParagraphHelper( r , testParagraph );
+
+ r->ignorableWhitespace( OUString() );
+ r->comment( OUString( "This is a comment !") );
+ r->ignorableWhitespace( OUString() );
+
+ r->startElement( OUString( "emptytagtest") , rList );
+ r->endElement( OUString( "emptytagtest") );
+ r->ignorableWhitespace( OUString() );
+
+ r->endElement( OUString( "hi") );
+ r->ignorableWhitespace( OUString() );
+
+ r->endElement( OUString( "tag1") );
+ r->endDocument();
+
+ printf( "finished writing\n" );
+ }
+ else
+ {
+ printf( "couldn't create sax-writer component\n" );
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sax/test/testcomponent.cxx b/sax/test/testcomponent.cxx
new file mode 100644
index 000000000..51f8f244e
--- /dev/null
+++ b/sax/test/testcomponent.cxx
@@ -0,0 +1,223 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+// testcomponent - Loads a service and its testcomponent from dlls performs a test.
+// Expands the dll-names depending on the actual environment.
+// Example : testcomponent com.sun.star.io.Pipe stm
+
+// Therefore the testcode must exist in teststm and the testservice must be named com.sun.star.io.Pipe
+
+
+#include <stdio.h>
+#include <com/sun/star/registry/XImplementationRegistration.hpp>
+#include <com/sun/star/lang/XComponent.hpp>
+
+#include <com/sun/star/test/XSimpleTest.hpp>
+
+#include <cppuhelper/servicefactory.hxx>
+
+
+using namespace ::cppu;
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::test;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::registry;
+
+// Needed to switch on solaris threads
+#ifdef __sun
+extern "C" void ChangeGlobalInit();
+#endif
+
+int main (int argc, char **argv)
+{
+
+ if( argc < 3) {
+ printf( "usage : testcomponent service dll [additional dlls]\n" );
+ exit( 0 );
+ }
+#ifdef __sun
+ // switch on threads in solaris
+ ChangeGlobalInit();
+#endif
+
+ // create service manager
+ Reference< XMultiServiceFactory > xSMgr =
+ createRegistryServiceFactory( OUString( "applicat.rdb") );
+
+ Reference < XImplementationRegistration > xReg;
+ Reference < XSimpleRegistry > xSimpleReg;
+
+ try
+ {
+ // Create registration service
+ Reference < XInterface > x = xSMgr->createInstance( "com.sun.star.registry.ImplementationRegistration" );
+ xReg.set( x , UNO_QUERY );
+ }
+ catch (const Exception&)
+ {
+ printf( "Couldn't create ImplementationRegistration service\n" );
+ exit(1);
+ }
+
+ char szBuf[1024];
+ OString sTestName;
+
+ try
+ {
+ // Load dll for the tested component
+ for( int n = 2 ; n <argc ; n ++ ) {
+#ifdef _WIN32
+ OUString aDllName = OStringToOUString( argv[n] , RTL_TEXTENCODING_ASCII_US );
+#else
+ OUString aDllName = "lib";
+ aDllName += OStringToOUString( argv[n] , RTL_TEXTENCODING_ASCII_US );
+ aDllName += ".so";
+#endif
+ xReg->registerImplementation(
+ OUString("com.sun.star.loader.SharedLibrary"),
+ aDllName,
+ xSimpleReg );
+ }
+ }
+ catch (const Exception &e)
+ {
+ printf( "Couldn't reach dll %s\n" , szBuf );
+ printf( "%s\n" , OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US ).getStr() );
+
+ exit(1);
+ }
+
+
+ try
+ {
+ // Load dll for the test component
+ sTestName = "test";
+ sTestName += argv[2];
+
+#ifdef _WIN32
+ OUString aDllName = OStringToOUString( sTestName , RTL_TEXTENCODING_ASCII_US );
+#else
+ OUString aDllName = "lib";
+ aDllName += OStringToOUString( sTestName , RTL_TEXTENCODING_ASCII_US );
+ aDllName += ".so";
+#endif
+
+ xReg->registerImplementation(
+ OUString("com.sun.star.loader.SharedLibrary") ,
+ aDllName,
+ xSimpleReg );
+ }
+ catch (const Exception&)
+ {
+ printf( "Couldn't reach dll %s\n" , szBuf );
+ exit(1);
+ }
+
+
+ // Instantiate test service
+ sTestName = "test.";
+ sTestName += argv[1];
+
+ Reference < XInterface > xIntTest =
+ xSMgr->createInstance( OStringToOUString( sTestName , RTL_TEXTENCODING_ASCII_US ) );
+ Reference< XSimpleTest > xTest( xIntTest , UNO_QUERY );
+
+ if( ! xTest.is() ) {
+ printf( "Couldn't instantiate test service \n" );
+ exit( 1 );
+ }
+
+
+ sal_Int32 nHandle = 0;
+ sal_Int32 nNewHandle;
+ sal_Int32 nErrorCount = 0;
+ sal_Int32 nWarningCount = 0;
+
+ // loop until all test are performed
+ while( nHandle != -1 )
+ {
+ // Instantiate service
+ Reference< XInterface > x =
+ xSMgr->createInstance( OStringToOUString( argv[1] , RTL_TEXTENCODING_ASCII_US ) );
+ if( ! x.is() )
+ {
+ printf( "Couldn't instantiate service !\n" );
+ exit( 1 );
+ }
+
+ // do the test
+ try
+ {
+ nNewHandle = xTest->test(
+ OStringToOUString( argv[1] , RTL_TEXTENCODING_ASCII_US ) , x , nHandle );
+ }
+ catch (const Exception &e)
+ {
+ OString o = OUStringToOString( e.Message, RTL_TEXTENCODING_ASCII_US );
+ printf( "testcomponent : uncaught exception %s\n" , o.getStr() );
+ exit(1);
+ }
+ catch (...)
+ {
+ printf( "testcomponent : uncaught unknown exception\n" );
+ exit(1);
+ }
+
+
+ // print errors and warning
+ Sequence<OUString> seqErrors = xTest->getErrors();
+ Sequence<OUString> seqWarnings = xTest->getWarnings();
+ if( seqWarnings.getLength() > nWarningCount )
+ {
+ printf( "Warnings during test %" SAL_PRIxUINT32 "!\n" , nHandle );
+ for( ; nWarningCount < seqWarnings.getLength() ; nWarningCount ++ )
+ {
+ OString o = OUStringToOString(
+ seqWarnings.getArray()[nWarningCount], RTL_TEXTENCODING_ASCII_US );
+ printf( "Warning\n%s\n---------\n" , o.getStr() );
+ }
+ }
+
+
+ if( seqErrors.getLength() > nErrorCount ) {
+ printf( "Errors during test %" SAL_PRIxUINT32 "!\n" , nHandle );
+ for( ; nErrorCount < seqErrors.getLength() ; nErrorCount ++ ) {
+ OString o = OUStringToOString(
+ seqErrors.getArray()[nErrorCount], RTL_TEXTENCODING_ASCII_US );
+ printf( "%s\n" , o.getStr() );
+ }
+ }
+
+ nHandle = nNewHandle;
+ }
+
+ if( xTest->testPassed() ) {
+ printf( "Test passed !\n" );
+ }
+ else {
+ printf( "Test failed !\n" );
+ }
+
+ Reference <XComponent > rComp( xSMgr , UNO_QUERY );
+ rComp->dispose();
+ return 0;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */