diff options
Diffstat (limited to 'sax')
45 files changed, 13363 insertions, 0 deletions
diff --git a/sax/CppunitTest_sax.mk b/sax/CppunitTest_sax.mk new file mode 100644 index 0000000000..282ba2414e --- /dev/null +++ b/sax/CppunitTest_sax.mk @@ -0,0 +1,30 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_CppunitTest_CppunitTest,sax)) + +$(eval $(call gb_CppunitTest_use_sdk_api,sax)) + +$(eval $(call gb_CppunitTest_use_external,sax,boost_headers)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax)) + +$(eval $(call gb_CppunitTest_use_libraries,sax, \ + sax \ + sal \ + comphelper \ + cppu \ +)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sax, \ + sax/qa/cppunit/test_converter \ +)) + + +# vim: set noet sw=4 ts=4: diff --git a/sax/CppunitTest_sax_attributes.mk b/sax/CppunitTest_sax_attributes.mk new file mode 100644 index 0000000000..198629814f --- /dev/null +++ b/sax/CppunitTest_sax_attributes.mk @@ -0,0 +1,29 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_CppunitTest_CppunitTest,sax_attributes)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax_attributes)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sax_attributes, \ + sax/qa/cppunit/attributes \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sax_attributes, \ + cppu \ + cppuhelper \ + sal \ + sax \ +)) + +$(eval $(call gb_CppunitTest_use_sdk_api,sax_attributes)) + +$(eval $(call gb_CppunitTest_use_ure,sax_attributes)) + +# vim: set noet sw=4 ts=4: diff --git a/sax/CppunitTest_sax_parser.mk b/sax/CppunitTest_sax_parser.mk new file mode 100644 index 0000000000..27dfc15fed --- /dev/null +++ b/sax/CppunitTest_sax_parser.mk @@ -0,0 +1,48 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_CppunitTest_CppunitTest,sax_parser)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax_parser)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sax_parser, \ + sax/qa/cppunit/parser \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sax_parser, \ + comphelper \ + cppu \ + cppuhelper \ + unotest \ + sal \ + sax \ + test \ +)) + +$(eval $(call gb_CppunitTest_use_externals,sax_parser, \ + boost_headers \ +)) + +$(eval $(call gb_CppunitTest_use_sdk_api,sax_parser)) + +$(eval $(call gb_CppunitTest_use_ure,sax_parser)) +$(eval $(call gb_CppunitTest_use_vcl,sax_parser)) + +$(eval $(call gb_CppunitTest_use_components,sax_parser,\ + configmgr/source/configmgr \ + framework/util/fwk \ + i18npool/util/i18npool \ + sfx2/util/sfx \ + ucb/source/core/ucb1 \ + ucb/source/ucp/file/ucpfile1 \ +)) + +$(eval $(call gb_CppunitTest_use_configuration,sax_parser)) + +# vim: set noet sw=4 ts=4: diff --git a/sax/CppunitTest_sax_xmlimport.mk b/sax/CppunitTest_sax_xmlimport.mk new file mode 100644 index 0000000000..2e10736329 --- /dev/null +++ b/sax/CppunitTest_sax_xmlimport.mk @@ -0,0 +1,57 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_CppunitTest_CppunitTest,sax_xmlimport)) + +$(eval $(call gb_CppunitTest_use_common_precompiled_header,sax_xmlimport)) + +$(eval $(call gb_CppunitTest_add_exception_objects,sax_xmlimport, \ +sax/qa/cppunit/xmlimport \ +)) + +$(eval $(call gb_CppunitTest_use_libraries,sax_xmlimport, \ + test \ + cppu \ + comphelper \ + sal \ + cppuhelper \ + sax \ + unotest \ + utl \ + salhelper \ +)) + +$(eval $(call gb_CppunitTest_use_api,sax_xmlimport,\ + offapi \ + udkapi \ +)) + +$(eval $(call gb_CppunitTest_use_externals,sax_xmlimport, \ + boost_headers \ +)) + +$(eval $(call gb_CppunitTest_use_ure,sax_xmlimport)) +$(eval $(call gb_CppunitTest_use_vcl,sax_xmlimport)) + +$(eval $(call gb_CppunitTest_set_include,sax_xmlimport,\ + -I$(SRCDIR)/sax/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_CppunitTest_use_components,sax_xmlimport,\ + configmgr/source/configmgr \ + sax/source/expatwrap/expwrap \ + ucb/source/core/ucb1 \ + ucb/source/ucp/file/ucpfile1 \ + uui/util/uui \ +)) + +$(eval $(call gb_CppunitTest_use_configuration,sax_xmlimport)) + +# vim: set noet sw=4 ts=4: diff --git a/sax/IwyuFilter_sax.yaml b/sax/IwyuFilter_sax.yaml new file mode 100644 index 0000000000..986dcdc988 --- /dev/null +++ b/sax/IwyuFilter_sax.yaml @@ -0,0 +1,2 @@ +--- +assumeFilename: sax/source/fastparser/fastparser.cxx diff --git a/sax/Library_sax.mk b/sax/Library_sax.mk new file mode 100644 index 0000000000..97389a2bb0 --- /dev/null +++ b/sax/Library_sax.mk @@ -0,0 +1,56 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Library_Library,sax)) + +$(eval $(call gb_Library_set_componentfile,sax,sax/source/expatwrap/expwrap,services)) + +$(eval $(call gb_Library_set_include,sax,\ + -I$(SRCDIR)/sax/inc \ + $$(INCLUDE) \ +)) + +$(eval $(call gb_Library_use_external,sax,boost_headers)) + +$(eval $(call gb_Library_use_common_precompiled_header,sax)) + +$(eval $(call gb_Library_use_sdk_api,sax)) + +$(eval $(call gb_Library_use_externals,sax,\ + boost_headers \ + libxml2 \ + expat \ +)) + +$(eval $(call gb_Library_use_libraries,sax,\ + comphelper \ + cppu \ + cppuhelper \ + sal \ + salhelper \ + tl \ +)) + +$(eval $(call gb_Library_add_defs,sax,\ + -DSAX_DLLIMPLEMENTATION \ +)) + +$(eval $(call gb_Library_add_exception_objects,sax,\ + sax/source/expatwrap/sax_expat \ + sax/source/expatwrap/saxwriter \ + sax/source/expatwrap/xml2utf \ + sax/source/fastparser/fastparser \ + sax/source/fastparser/legacyfastparser \ + sax/source/tools/converter \ + sax/source/tools/fastattribs \ + sax/source/tools/fastserializer \ + sax/source/tools/fshelper \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/sax/Makefile b/sax/Makefile new file mode 100644 index 0000000000..ccb1c85a04 --- /dev/null +++ b/sax/Makefile @@ -0,0 +1,7 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- + +module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST)))) + +include $(module_directory)/../solenv/gbuild/partial_build.mk + +# vim: set noet sw=4 ts=4: diff --git a/sax/Module_sax.mk b/sax/Module_sax.mk new file mode 100644 index 0000000000..2c0b9596a8 --- /dev/null +++ b/sax/Module_sax.mk @@ -0,0 +1,23 @@ +# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +$(eval $(call gb_Module_Module,sax)) + +$(eval $(call gb_Module_add_targets,sax,\ + Library_sax \ +)) + +$(eval $(call gb_Module_add_check_targets,sax,\ + CppunitTest_sax \ + CppunitTest_sax_attributes \ + CppunitTest_sax_parser \ + CppunitTest_sax_xmlimport \ +)) + +# vim: set noet sw=4 ts=4: diff --git a/sax/README.md b/sax/README.md new file mode 100644 index 0000000000..30ae23197a --- /dev/null +++ b/sax/README.md @@ -0,0 +1,15 @@ +# UNO Services for SAX + +UNO services for SAX parsing and C++ functions for XMLSchema-2 data types. + +* `source/expwrap`: + string-based SAX parser UNO service wrapping expat +* `source/fastparser`: + multi-threaded token-based SAX parser UNO service wrapping libxml2 +* `source/tools`: + + C++ wrapper for fast SAX parser + + C++ XMLSchema-2 data type conversion helpers + +Multi-threading in FastParser can be disabled for debugging purposes with: + + SAX_DISABLE_THREADS=1 SAL_LOG="+INFO.sax.fastparser+WARN" diff --git a/sax/README.vars b/sax/README.vars new file mode 100644 index 0000000000..35d0bccd03 --- /dev/null +++ b/sax/README.vars @@ -0,0 +1,5 @@ +Environment variables in sax: + +- fastparser: + + - SAX_DISABLE_THREADS - don't thread sax parsing diff --git a/sax/inc/xml2utf.hxx b/sax/inc/xml2utf.hxx new file mode 100644 index 0000000000..ead6ac1143 --- /dev/null +++ b/sax/inc/xml2utf.hxx @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SAX_INC_XML2UTF_HXX +#define INCLUDED_SAX_INC_XML2UTF_HXX + +#include <sal/config.h> + +#include <memory> + +#include <sal/types.h> +#include <rtl/string.hxx> + +#include <com/sun/star/io/XInputStream.hpp> + +namespace sax_expatwrap { + +class Text2UnicodeConverter +{ + +public: + Text2UnicodeConverter( const OString & sEncoding ); + ~Text2UnicodeConverter(); + + css::uno::Sequence < sal_Unicode > convert( const css::uno::Sequence<sal_Int8> & ); + bool canContinue() const { return m_bCanContinue; } + +private: + void init( rtl_TextEncoding encoding ); + + rtl_TextToUnicodeConverter m_convText2Unicode; + rtl_TextToUnicodeContext m_contextText2Unicode; + bool m_bCanContinue; + bool m_bInitialized; + css::uno::Sequence<sal_Int8> m_seqSource; +}; + +/*---------------------------------------- +* +* Unicode2TextConverter +* +**-----------------------------------------*/ +class Unicode2TextConverter +{ +public: + Unicode2TextConverter( rtl_TextEncoding encoding ); + ~Unicode2TextConverter(); + + css::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength ); + +private: + rtl_UnicodeToTextConverter m_convUnicode2Text; + rtl_UnicodeToTextContext m_contextUnicode2Text; + css::uno::Sequence<sal_Unicode> m_seqSource; +}; + + +/*---------------------------------------- +* +* XMLFile2UTFConverter +* +**-----------------------------------------*/ +class XMLFile2UTFConverter +{ +public: + XMLFile2UTFConverter( ): + m_bStarted( false ) + {} + + void setInputStream( css::uno::Reference< css::io::XInputStream > const &r ) { m_in = r; } + void setEncoding( const OString &s ) { m_sEncoding = s; } + + + // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There + // may be returned less or more bytes than ordered. + /// @throws css::io::IOException + /// @throws css::io::NotConnectedException + /// @throws css::io::BufferSizeExceededException + /// @throws css::uno::RuntimeException + sal_Int32 readAndConvert( css::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ); + +private: + + // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. + // @return TRUE, when encoding information could be retrieved + // @return FALSE, when no encoding information was found in file + bool scanForEncoding( css::uno::Sequence<sal_Int8> &seq ); + + // Called only on first Sequence of bytes. Tries to figure out + // if enough data is available to scan encoding + // @return TRUE, when encoding is retrievable + // @return FALSE, when more data is needed + static bool isEncodingRecognizable( const css::uno::Sequence< sal_Int8 > & seq ); + + // When encoding attribute is within the text (in the first line), it is removed. + static void removeEncoding( css::uno::Sequence<sal_Int8> &seq ); + + // Initializes decoding depending on m_sEncoding setting + void initializeDecoding(); +private: + css::uno::Reference< css::io::XInputStream > m_in; + + bool m_bStarted; + OString m_sEncoding; + + std::unique_ptr<Text2UnicodeConverter> m_pText2Unicode; + std::unique_ptr<Unicode2TextConverter> m_pUnicode2Text; +}; +} + +#endif // INCLUDED_SAX_INC_XML2UTF_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/qa/cppunit/attributes.cxx b/sax/qa/cppunit/attributes.cxx new file mode 100644 index 0000000000..899716d62f --- /dev/null +++ b/sax/qa/cppunit/attributes.cxx @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/types.h> +#include <com/sun/star/xml/sax/SAXException.hpp> +#include <cppunit/TestFixture.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> + +#include <rtl/ref.hxx> +#include <sax/fastattribs.hxx> + +using namespace css; +using namespace css::xml; + +namespace { + +class AttributesTest: public CppUnit::TestFixture +{ +public: + void test(); + + CPPUNIT_TEST_SUITE( AttributesTest ); + CPPUNIT_TEST( test ); + CPPUNIT_TEST_SUITE_END(); +}; + +void AttributesTest::test() +{ + rtl::Reference<sax_fastparser::FastAttributeList> xAttributeList( new sax_fastparser::FastAttributeList(nullptr) ); + xAttributeList->add(1, "1"); + xAttributeList->add(2, OString::Concat("2")); + xAttributeList->add(3, u"3"); + xAttributeList->add(4, OUString::Concat("4")); + + // We can't test getValueToken() and getOptionalValueToken() + // without XFastTokenHandler :-( + // Uncomment to get segmentation fault: + // xAttributeList->getOptionalValueToken(1, 0); + // xAttributeList->getValueToken(2); + + CPPUNIT_ASSERT( xAttributeList->hasAttribute(1) ); + CPPUNIT_ASSERT( !xAttributeList->hasAttribute(5) ); + + CPPUNIT_ASSERT_EQUAL( OUString("2"), xAttributeList->getOptionalValue(2) ); + CPPUNIT_ASSERT_EQUAL( OUString("3"), xAttributeList->getOptionalValue(3) ); + CPPUNIT_ASSERT_EQUAL( OUString("4"), xAttributeList->getOptionalValue(4) ); + CPPUNIT_ASSERT_EQUAL( OUString(), xAttributeList->getOptionalValue(5) ); + + CPPUNIT_ASSERT_EQUAL( OUString("1"), xAttributeList->getValue(1) ); + CPPUNIT_ASSERT_THROW( xAttributeList->getValue(5), xml::sax::SAXException ); + + xAttributeList->addUnknown("a"_ostr, "a"_ostr); + xAttributeList->addUnknown("b", "b"_ostr, "b"_ostr); + xAttributeList->addUnknown("c"_ostr, "c"_ostr); + CPPUNIT_ASSERT_EQUAL( sal_Int32(3), xAttributeList->getUnknownAttributes().getLength() ); + + CPPUNIT_ASSERT_EQUAL( sal_Int32(4), xAttributeList->getFastAttributes().getLength() ); + + xAttributeList->clear(); + CPPUNIT_ASSERT( !xAttributeList->hasAttribute(1) ); + CPPUNIT_ASSERT( !xAttributeList->getFastAttributes().hasElements() ); + xAttributeList->addUnknown("c"_ostr, "c"_ostr); + CPPUNIT_ASSERT_EQUAL( sal_Int32(1), xAttributeList->getUnknownAttributes().getLength() ); +} + +CPPUNIT_TEST_SUITE_REGISTRATION( AttributesTest ); + +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/qa/cppunit/parser.cxx b/sax/qa/cppunit/parser.cxx new file mode 100644 index 0000000000..670c1afa92 --- /dev/null +++ b/sax/qa/cppunit/parser.cxx @@ -0,0 +1,99 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <com/sun/star/io/Pipe.hpp> +#include <com/sun/star/xml/sax/FastToken.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> + +#include <sax/fastparser.hxx> +#include <sax/fastattribs.hxx> +#include <test/bootstrapfixture.hxx> +#include <rtl/ref.hxx> + +using namespace css; +using namespace css::xml::sax; + +namespace { + +class DummyTokenHandler : public sax_fastparser::FastTokenHandlerBase +{ +public: + DummyTokenHandler() {} + + virtual sal_Int32 SAL_CALL getTokenFromUTF8( const uno::Sequence<sal_Int8>& ) override + { + return FastToken::DONTKNOW; + } + virtual uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 ) override + { + CPPUNIT_ASSERT_MESSAGE( "getUTF8Identifier: unexpected call", false ); + return uno::Sequence<sal_Int8>(); + } + virtual sal_Int32 getTokenDirect( const char * /* pToken */, sal_Int32 /* nLength */ ) const override + { + return -1; + } +}; + +class ParserTest: public test::BootstrapFixture +{ + InputSource maInput; + rtl::Reference< sax_fastparser::FastSaxParser > mxParser; + rtl::Reference< DummyTokenHandler > mxTokenHandler; + +public: + virtual void setUp() override; + + void parse(); + + CPPUNIT_TEST_SUITE(ParserTest); + CPPUNIT_TEST(parse); + CPPUNIT_TEST_SUITE_END(); + +private: + uno::Reference< io::XInputStream > createStream(const OString& sInput); +}; + +void ParserTest::setUp() +{ + test::BootstrapFixture::setUp(); + mxTokenHandler.set( new DummyTokenHandler() ); + mxParser.set( new sax_fastparser::FastSaxParser() ); + mxParser->setTokenHandler( mxTokenHandler ); +} + +uno::Reference< io::XInputStream > ParserTest::createStream(const OString& sInput) +{ + uno::Reference< io::XOutputStream > xPipe( io::Pipe::create(m_xContext) ); + uno::Reference< io::XInputStream > xInStream( xPipe, uno::UNO_QUERY ); + uno::Sequence< sal_Int8 > aSeq( reinterpret_cast<sal_Int8 const *>(sInput.getStr()), sInput.getLength() ); + xPipe->writeBytes( aSeq ); + xPipe->flush(); + xPipe->closeOutput(); + return xInStream; +} + +void ParserTest::parse() +{ + maInput.aInputStream = createStream("<a>...<b />..</a>"_ostr); + mxParser->parseStream( maInput ); + + maInput.aInputStream = createStream("<b></a>"_ostr); + CPPUNIT_ASSERT_THROW( mxParser->parseStream( maInput ), css::xml::sax::SAXParseException ); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(ParserTest); + +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/qa/cppunit/test_converter.cxx b/sax/qa/cppunit/test_converter.cxx new file mode 100644 index 0000000000..525e110c1a --- /dev/null +++ b/sax/qa/cppunit/test_converter.cxx @@ -0,0 +1,623 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <limits> + +#include <sal/types.h> +#include <cppunit/TestAssert.h> +#include <cppunit/TestFixture.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> + +#include <rtl/ustrbuf.hxx> + +#include <com/sun/star/util/DateTime.hpp> +#include <com/sun/star/util/Duration.hpp> +#include <com/sun/star/util/MeasureUnit.hpp> + +#include <sax/tools/converter.hxx> +#include <sal/log.hxx> + + +using namespace ::com::sun::star; +using namespace ::com::sun::star::util; +using sax::Converter; + + +namespace { + +class ConverterTest + : public ::CppUnit::TestFixture +{ +public: + + void testDuration(); + void testDateTime(); + void testTime(); + void testDouble(); + void testMeasure(); + void testBool(); + void testPercent(); + void testColor(); + void testNumber(); + + CPPUNIT_TEST_SUITE(ConverterTest); + CPPUNIT_TEST(testDuration); + CPPUNIT_TEST(testDateTime); + CPPUNIT_TEST(testTime); + CPPUNIT_TEST(testDouble); + CPPUNIT_TEST(testMeasure); + CPPUNIT_TEST(testBool); + CPPUNIT_TEST(testPercent); + CPPUNIT_TEST(testColor); + CPPUNIT_TEST(testNumber); + CPPUNIT_TEST_SUITE_END(); + +private: +}; + +void doTest(util::Duration const & rid, char const*const pis, + char const*const i_pos = nullptr) +{ + char const*const pos(i_pos ? i_pos : pis); + util::Duration od; + OUString is(OUString::createFromAscii(pis)); + SAL_INFO("sax.cppunit","about to convert '" << is << "'"); + bool bSuccess = Converter::convertDuration(od, is); + SAL_INFO("sax.cppunit","" << (od.Negative ? "-" : "+") << " " << od.Years << "Y " << od.Months << "M " << od.Days << "D " << od.Hours << "H " << od.Minutes << "M " << od.Seconds << "S " << od.NanoSeconds << "n"); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_EQUAL(rid.Years, od.Years); + CPPUNIT_ASSERT_EQUAL(rid.Months, od.Months); + CPPUNIT_ASSERT_EQUAL(rid.Days, od.Days); + CPPUNIT_ASSERT_EQUAL(rid.Hours, od.Hours); + CPPUNIT_ASSERT_EQUAL(rid.Minutes, od.Minutes); + CPPUNIT_ASSERT_EQUAL(rid.Seconds, od.Seconds); + CPPUNIT_ASSERT_EQUAL(rid.NanoSeconds, od.NanoSeconds); + CPPUNIT_ASSERT_EQUAL(rid.Negative, od.Negative); + OUStringBuffer buf(64); + Converter::convertDuration(buf, od); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT(buf.makeStringAndClear().equalsAscii(pos)); +} + +void doTestDurationF(char const*const pis) +{ + util::Duration od; + bool bSuccess = Converter::convertDuration(od, + OUString::createFromAscii(pis)); + SAL_INFO("sax.cppunit","" << (od.Negative ? "-" : "+") << " " << od.Years << "Y " << od.Months << "M " << od.Days << "D " << od.Hours << "H " << od.Minutes << "M " << od.Seconds << "S " << od.NanoSeconds << "n"); + CPPUNIT_ASSERT_MESSAGE(pis, !bSuccess); +} + +void ConverterTest::testDuration() +{ + SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST BEGIN"); + doTest( util::Duration(false, 1, 0, 0, 0, 0, 0, 0), "P1Y" ); + doTest( util::Duration(false, 0, 42, 0, 0, 0, 0, 0), "P42M" ); + doTest( util::Duration(false, 0, 0, 111, 0, 0, 0, 0), "P111D" ); + doTest( util::Duration(false, 0, 0, 0, 52, 0, 0, 0), "PT52H" ); + doTest( util::Duration(false, 0, 0, 0, 0, 717, 0, 0), "PT717M" ); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 121, 0), "PT121S" ); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 190000000), "PT0.19S", "PT0.190000000S"); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 90000000), "PT0.09S", "PT0.090000000S" ); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 9000000), "PT0.009S", "PT0.009000000S" ); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 9), "PT0.000000009S", "PT0.000000009S" ); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 9, 999999999), + "PT9.999999999999999999999999999999S", "PT9.999999999S" ); + doTest( util::Duration(true , 0, 0, 9999, 0, 0, 0, 0), "-P9999D" ); + doTest( util::Duration(true , 7, 6, 5, 4, 3, 2, 10000000), + "-P7Y6M5DT4H3M2.01000S", "-P7Y6M5DT4H3M2.010000000S" ); + doTest( util::Duration(false, 0, 6, 0, 0, 3, 0, 0), "P6MT3M" ); + doTest( util::Duration(false, 0, 0, 0, 0, 0, 0, 0), "P0D" ); + doTestDurationF("1Y1M"); // invalid: no ^P + doTestDurationF("P-1Y1M"); // invalid: - after P + doTestDurationF("P1M1Y"); // invalid: Y after M + doTestDurationF("PT1Y"); // invalid: Y after T + doTestDurationF("P1Y1M1M"); // invalid: M twice, no T + doTestDurationF("P1YT1MT1M"); // invalid: T twice + doTestDurationF("P1YT"); // invalid: T but no H,M,S + doTestDurationF("P99999999999Y"); // cannot parse so many Ys + doTestDurationF("PT.1S"); // invalid: no 0 preceding . + doTestDurationF("PT5M.134S"); // invalid: no 0 preceding . + doTestDurationF("PT1.S"); // invalid: no digit following . + SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST END"); +} + + +bool eqDateTime(const util::DateTime& a, const util::DateTime& b) { + return a.Year == b.Year && a.Month == b.Month && a.Day == b.Day + && a.Hours == b.Hours && a.Minutes == b.Minutes + && a.Seconds == b.Seconds + && a.NanoSeconds == b.NanoSeconds + && a.IsUTC == b.IsUTC; +} + +void doTest(util::DateTime const & rdt, char const*const pis, + char const*const i_pos = nullptr) +{ + char const*const pos(i_pos ? i_pos : pis); + OUString is(OUString::createFromAscii(pis)); + util::DateTime odt; + SAL_INFO("sax.cppunit","about to convert '" << is << "'"); + bool bSuccess( Converter::parseDateTime(odt, is) ); + SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << " M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds << " UTC: " << static_cast<bool>(odt.IsUTC)); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT(eqDateTime(rdt, odt)); + OUStringBuffer buf(32); + Converter::convertDateTime(buf, odt, nullptr, true); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(OUString::createFromAscii(pos), + buf.makeStringAndClear()); +} + +void doTestDateTimeF(char const*const pis) +{ + util::DateTime odt; + bool bSuccess = Converter::parseDateTime(odt, OUString::createFromAscii(pis)); + SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << "H M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds); + CPPUNIT_ASSERT(!bSuccess); +} + +void ConverterTest::testDateTime() +{ + SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST BEGIN"); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, false), "0001-01-01T00:00:00" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), "0001-01-01T00:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1, false), + "-0001-01-01T00:00:00"); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1, true), + "-0001-01-01T01:00:00+01:00", "-0001-01-01T00:00:00Z"); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, -324, false), + "-0324-01-01T00:00:00" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), + "0001-01-01T00:00:00-00:00", "0001-01-01T00:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), + "0001-01-01T00:00:00+00:00", "0001-01-01T00:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 12, 2, 1, 1, true), + "0001-01-02T00:00:00-12:00", "0001-01-02T12:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 12, 1, 1, 1, true), + "0001-01-02T00:00:00+12:00", "0001-01-01T12:00:00Z" ); + doTest( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, false), + "9999-12-31T23:59:59.99", "9999-12-31T23:59:59.990000000" ); + doTest( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, true), + "9999-12-31T23:59:59.99Z", "9999-12-31T23:59:59.990000000Z" ); + doTest( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, false), + "9999-12-31T23:59:59.9999999999999999999999999999999999999", + "9999-12-31T23:59:59.999999999" ); + doTest( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, true), + "9999-12-31T23:59:59.9999999999999999999999999999999999999Z", + "9999-12-31T23:59:59.999999999Z" ); + doTest( util::DateTime(0, 0, 0, 0, 29, 2, 2000, true), // leap year + "2000-02-29T00:00:00-00:00", "2000-02-29T00:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 0, 29, 2, 1600, true), // leap year + "1600-02-29T00:00:00-00:00", "1600-02-29T00:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 24, 1, 1, 333, false) + /*(0, 0, 0, 0, 2, 1, 333)*/, + "0333-01-01T24:00:00"/*, "0333-01-02T00:00:00"*/ ); + // While W3C XMLSchema specifies a minimum of 4 year digits we are lenient + // in what we accept. + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1, false), + "1-01-01T00:00:00", "0001-01-01T00:00:00" ); + doTestDateTimeF( "+0001-01-01T00:00:00" ); // invalid: ^+ + doTestDateTimeF( "0001-1-01T00:00:00" ); // invalid: < 2 M + doTestDateTimeF( "0001-01-1T00:00:00" ); // invalid: < 2 D + doTestDateTimeF( "0001-01-01T0:00:00" ); // invalid: < 2 H + doTestDateTimeF( "0001-01-01T00:0:00" ); // invalid: < 2 M + doTestDateTimeF( "0001-01-01T00:00:0" ); // invalid: < 2 S + doTestDateTimeF( "0001-01-01T00:00:00." ); // invalid: .$ + doTestDateTimeF( "0001-01-01T00:00:00+1:00" ); // invalid: < 2 TZ H + doTestDateTimeF( "0001-01-01T00:00:00+00:1" ); // invalid: < 2 TZ M + doTestDateTimeF( "0001-13-01T00:00:00" ); // invalid: M > 12 + doTestDateTimeF( "0001-01-32T00:00:00" ); // invalid: D > 31 + doTestDateTimeF( "0001-01-01T25:00:00" ); // invalid: H > 24 + doTestDateTimeF( "0001-01-01T00:60:00" ); // invalid: M > 59 + doTestDateTimeF( "0001-01-01T00:00:60" ); // invalid: S > 59 + doTestDateTimeF( "0001-01-01T24:01:00" ); // invalid: H=24, but M != 0 + doTestDateTimeF( "0001-01-01T24:00:01" ); // invalid: H=24, but S != 0 + doTestDateTimeF( "0001-01-01T24:00:00.1" ); // invalid: H=24, but H != 0 + doTestDateTimeF( "0001-01-02T00:00:00+15:00" ); // invalid: TZ > +14:00 + doTestDateTimeF( "0001-01-02T00:00:00+14:01" ); // invalid: TZ > +14:00 + doTestDateTimeF( "0001-01-02T00:00:00-15:00" ); // invalid: TZ < -14:00 + doTestDateTimeF( "0001-01-02T00:00:00-14:01" ); // invalid: TZ < -14:00 + doTestDateTimeF( "2100-02-29T00:00:00-00:00" ); // invalid: no leap year + doTestDateTimeF( "1900-02-29T00:00:00-00:00" ); // invalid: no leap year + doTestDateTimeF( "00:00:00" ); // invalid: no date + doTestDateTimeF( "T00:00:00" ); // invalid: no date + SAL_INFO("sax.cppunit","\nSAX CONVERTER TEST END"); +} + +void doTestTime(util::DateTime const & rdt, char const*const pis, + char const*const i_pos = nullptr) +{ + char const*const pos(i_pos ? i_pos : pis); + OUString is(OUString::createFromAscii(pis)); + util::DateTime odt; + SAL_INFO("sax.cppunit","about to convert '" << is << "'"); + bool bSuccess( Converter::parseTimeOrDateTime(odt, is) ); + SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << " M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds << " UTC: " << static_cast<bool>(odt.IsUTC)); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT(eqDateTime(rdt, odt)); + OUStringBuffer buf(32); + Converter::convertTimeOrDateTime(buf, odt); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(OUString::createFromAscii(pos), + buf.makeStringAndClear()); +} + +void doTestTimeF(char const*const pis) +{ + util::DateTime odt; + bool bSuccess = Converter::parseTimeOrDateTime(odt, OUString::createFromAscii(pis)); + SAL_INFO("sax.cppunit","Y:" << odt.Year << " M:" << odt.Month << " D:" << odt.Day << " H:" << odt.Hours << "H M:" << odt.Minutes << " S:" << odt.Seconds << " nS:" << odt.NanoSeconds); + CPPUNIT_ASSERT_MESSAGE(pis, !bSuccess); +} + +void ConverterTest::testTime() // time or dateTime + horrible backcompat mess +{ + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, false), + "0001-01-01T00:00:00" ); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, false), + "0001-01-01T00:00:00" ); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), + "0001-01-01T00:00:00Z" ); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, -1, false), + "-0001-01-01T00:00:00"); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, -1, true), + "-0001-01-01T01:00:00+01:00", "-0001-01-01T00:00:00Z"); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, -324, false), + "-0324-01-01T00:00:00" ); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), + "0001-01-01T00:00:00-00:00", "0001-01-01T00:00:00Z" ); + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, true), + "0001-01-01T00:00:00+00:00", "0001-01-01T00:00:00Z" ); + doTestTime( util::DateTime(0, 0, 0, 12, 2, 1, 1, true), + "0001-01-02T00:00:00-12:00", "0001-01-02T12:00:00Z" ); + doTestTime( util::DateTime(0, 0, 0, 12, 1, 1, 1, true), + "0001-01-02T00:00:00+12:00", "0001-01-01T12:00:00Z" ); + doTestTime( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, false), + "9999-12-31T23:59:59.99", "9999-12-31T23:59:59.990000000" ); + doTestTime( util::DateTime(990000000, 59, 59, 23, 31, 12, 9999, true), + "9999-12-31T23:59:59.99Z", "9999-12-31T23:59:59.990000000Z" ); + doTestTime( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, false), + "9999-12-31T23:59:59.9999999999999999999999999999999999999", + "9999-12-31T23:59:59.999999999" ); + doTestTime( util::DateTime(999999999, 59, 59, 23, 31, 12, 9999, true), + "9999-12-31T23:59:59.9999999999999999999999999999999999999Z", + "9999-12-31T23:59:59.999999999Z" ); + doTestTime( util::DateTime(0, 0, 0, 0, 29, 2, 2000, true), // leap year + "2000-02-29T00:00:00-00:00", "2000-02-29T00:00:00Z" ); + doTestTime( util::DateTime(0, 0, 0, 0, 29, 2, 1600, true), // leap year + "1600-02-29T00:00:00-00:00", "1600-02-29T00:00:00Z" ); + doTestTime( util::DateTime(0, 0, 0, 24, 1, 1, 333, false) + /*(0, 0, 0, 0, 2, 1, 333)*/, + "0333-01-01T24:00:00"/*, "0333-01-02T00:00:00"*/ ); + // While W3C XMLSchema specifies a minimum of 4 year digits we are lenient + // in what we accept. + doTestTime( util::DateTime(0, 0, 0, 0, 1, 1, 1, false), + "1-01-01T00:00:00", "0001-01-01T00:00:00" ); + + doTestTime( util::DateTime(0, 0, 0, 0, 0, 0, 0, false), "00:00:00" ); + doTestTime( util::DateTime(0, 0, 0, 24, 0, 0, 0, false), "24:00:00" ); + doTestTime( util::DateTime(0, 0, 59, 0, 0, 0, 0, false), "00:59:00" ); + doTestTime( util::DateTime(0, 1, 2, 4, 0, 0, 0, true), "04:02:01Z" ); + doTestTime( util::DateTime(0, 1, 2, 4, 0, 0, 0, true), + "05:02:01+01:00", "04:02:01Z" ); + doTestTime( util::DateTime(0, 11, 12, 9, 0, 0, 0, true), + "05:12:11-04:00", "09:12:11Z" ); + doTestTime( util::DateTime(990000000, 59, 59, 23, 0, 0, 0, false), + "23:59:59.99", "23:59:59.990000000" ); + doTestTime( util::DateTime(990000000, 59, 59, 23, 0, 0, 0, true), + "23:59:59.99Z", "23:59:59.990000000Z" ); + // backwards compatible: recognize invalid 0000-00-00 date (LO 3.5) + doTestTime( util::DateTime(0, 1, 0, 0, 0, 0, 0, false), + "0000-00-00T00:00:01", "00:00:01" ); + // backwards compatible: recognize invalid 0-00-00 date (OOo) + doTestTime( util::DateTime(0, 0, 1, 0, 0, 0, 0, false), + "0-00-00T00:01:00", "00:01:00" ); + + doTestTimeF( "+0001-01-01T00:00:00" ); // invalid: ^+ + doTestTimeF( "0001-1-01T00:00:00" ); // invalid: < 2 M + doTestTimeF( "0001-01-1T00:00:00" ); // invalid: < 2 D + doTestTimeF( "0001-01-01T0:00:00" ); // invalid: < 2 H + doTestTimeF( "0001-01-01T00:0:00" ); // invalid: < 2 M + doTestTimeF( "0001-01-01T00:00:0" ); // invalid: < 2 S + doTestTimeF( "0001-01-01T00:00:00." ); // invalid: .$ + doTestTimeF( "0001-01-01T00:00:00+1:00" ); // invalid: < 2 TZ H + doTestTimeF( "0001-01-01T00:00:00+00:1" ); // invalid: < 2 TZ M + doTestTimeF( "0001-13-01T00:00:00" ); // invalid: M > 12 + doTestTimeF( "0001-01-32T00:00:00" ); // invalid: D > 31 + doTestTimeF( "0001-01-01T25:00:00" ); // invalid: H > 24 + doTestTimeF( "0001-01-01T00:60:00" ); // invalid: M > 59 + doTestTimeF( "0001-01-01T00:00:60" ); // invalid: S > 59 + doTestTimeF( "0001-01-01T24:01:00" ); // invalid: H=24, but M != 0 + doTestTimeF( "0001-01-01T24:00:01" ); // invalid: H=24, but S != 0 + doTestTimeF( "0001-01-01T24:00:00.1" ); // invalid: H=24, but H != 0 + doTestTimeF( "0001-01-02T00:00:00+15:00" ); // invalid: TZ > +14:00 + doTestTimeF( "0001-01-02T00:00:00+14:01" ); // invalid: TZ > +14:00 + doTestTimeF( "0001-01-02T00:00:00-15:00" ); // invalid: TZ < -14:00 + doTestTimeF( "0001-01-02T00:00:00-14:01" ); // invalid: TZ < -14:00 + doTestTimeF( "2100-02-29T00:00:00-00:00" ); // invalid: no leap year + doTestTimeF( "1900-02-29T00:00:00-00:00" ); // invalid: no leap year + doTestTimeF( "T00:00:00" ); // invalid: T + doTestTimeF( "0:00:00" ); // invalid: < 2 H + doTestTimeF( "00:0:00" ); // invalid: < 2 M + doTestTimeF( "00:00:0" ); // invalid: < 2 S + doTestTimeF( "00:00:00." ); // invalid: .$ + doTestTimeF( "00:00:00+1:00" ); // invalid: < 2 TZ H + doTestTimeF( "00:00:00+00:1" ); // invalid: < 2 TZ M + doTestTimeF( "25:00:00" ); // invalid: H > 24 + doTestTimeF( "00:60:00" ); // invalid: M > 59 + doTestTimeF( "00:00:60" ); // invalid: S > 59 + doTestTimeF( "24:01:00" ); // invalid: H=24, but M != 0 + doTestTimeF( "24:00:01" ); // invalid: H=24, but S != 0 + doTestTimeF( "24:00:00.1" ); // invalid: H=24, but H != 0 + doTestTimeF( "00:00:00+15:00" ); // invalid: TZ > +14:00 + doTestTimeF( "00:00:00+14:01" ); // invalid: TZ > +14:00 + doTestTimeF( "00:00:00-15:00" ); // invalid: TZ < -14:00 + doTestTimeF( "00:00:00-14:01" ); // invalid: TZ < -14:00 +} + +void doTestDouble(char const*const pis, double const rd, + sal_Int16 const nSourceUnit, sal_Int16 const nTargetUnit) +{ + OUString const is(OUString::createFromAscii(pis)); + double od; + bool bSuccess(Converter::convertDouble(od, is, nSourceUnit, nTargetUnit)); + SAL_INFO("sax.cppunit","" << od); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_DOUBLES_EQUAL(rd, od, 0.00000001); + OUStringBuffer buf; + Converter::convertDouble(buf, od, true, nTargetUnit, nSourceUnit); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear()); +} + +void ConverterTest::testDouble() +{ + doTestDouble("42", 42.0, MeasureUnit::TWIP, MeasureUnit::TWIP); + doTestDouble("42", 42.0, MeasureUnit::POINT, MeasureUnit::POINT); + doTestDouble("42", 42.0, MeasureUnit::MM_100TH, MeasureUnit::MM_100TH); + doTestDouble("42", 42.0, MeasureUnit::MM_10TH, MeasureUnit::MM_10TH); + doTestDouble("42", 42.0, MeasureUnit::MM, MeasureUnit::MM); // identity don't seem to add unit? + doTestDouble("42", 42.0, MeasureUnit::CM, MeasureUnit::CM); + doTestDouble("42", 42.0, MeasureUnit::INCH, MeasureUnit::INCH); + doTestDouble("2pt", 40.0, MeasureUnit::POINT, MeasureUnit::TWIP); + doTestDouble("20pc", 1, MeasureUnit::TWIP, MeasureUnit::POINT); + doTestDouble("4", 2.26771653543307, MeasureUnit::MM_100TH, MeasureUnit::TWIP); + doTestDouble("4", 22.6771653543307, MeasureUnit::MM_10TH, MeasureUnit::TWIP); + doTestDouble("4mm", 226.771653543307, MeasureUnit::MM, MeasureUnit::TWIP); + doTestDouble("4cm", 2267.71653543307, MeasureUnit::CM, MeasureUnit::TWIP); + doTestDouble("4in", 5760.0, MeasureUnit::INCH, MeasureUnit::TWIP); + doTestDouble("1440pc", 1.0, MeasureUnit::TWIP, MeasureUnit::INCH); + doTestDouble("567pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::CM); + doTestDouble("56.7pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::MM); + doTestDouble("5.67pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::MM_10TH); + doTestDouble("0.567pc", 1.000125, MeasureUnit::TWIP, MeasureUnit::MM_100TH); + doTestDouble("42pt", 1.4816666666666, MeasureUnit::POINT, MeasureUnit::CM); + doTestDouble("42pt", 14.816666666666, MeasureUnit::POINT, MeasureUnit::MM); + doTestDouble("42pt", 148.16666666666, MeasureUnit::POINT, MeasureUnit::MM_10TH); + doTestDouble("42pt", 1481.6666666666, MeasureUnit::POINT, MeasureUnit::MM_100TH); + doTestDouble("72pt", 1.0, MeasureUnit::POINT, MeasureUnit::INCH); + doTestDouble("3.5in", 8.89, MeasureUnit::INCH, MeasureUnit::CM); + doTestDouble("3.5in", 88.9, MeasureUnit::INCH, MeasureUnit::MM); + doTestDouble("3.5in", 889.0, MeasureUnit::INCH, MeasureUnit::MM_10TH); + doTestDouble("3.5in", 8890.0, MeasureUnit::INCH, MeasureUnit::MM_100TH); + doTestDouble("2in", 144, MeasureUnit::INCH, MeasureUnit::POINT); + doTestDouble("5.08cm", 2.0, MeasureUnit::CM, MeasureUnit::INCH); + doTestDouble("3.5cm", 3500.0, MeasureUnit::CM, MeasureUnit::MM_100TH); + doTestDouble("3.5cm", 350.0, MeasureUnit::CM, MeasureUnit::MM_10TH); + doTestDouble("3.5cm", 35.0, MeasureUnit::CM, MeasureUnit::MM); + doTestDouble("10cm", 283.464566929134, MeasureUnit::CM, MeasureUnit::POINT); + doTestDouble("0.5cm", 283.464566929134, MeasureUnit::CM, MeasureUnit::TWIP); + doTestDouble("10mm", 28.3464566929134, MeasureUnit::MM, MeasureUnit::POINT); + doTestDouble("0.5mm", 28.3464566929134, MeasureUnit::MM, MeasureUnit::TWIP); + doTestDouble("10", 2.83464566929134, MeasureUnit::MM_10TH, MeasureUnit::POINT); + doTestDouble("0.5", 2.83464566929134, MeasureUnit::MM_10TH, MeasureUnit::TWIP); + doTestDouble("10", 0.283464566929134, MeasureUnit::MM_100TH, MeasureUnit::POINT); + doTestDouble("0.5", 0.283464566929134, MeasureUnit::MM_100TH, MeasureUnit::TWIP); + doTestDouble("10mm", 1.0, MeasureUnit::MM, MeasureUnit::CM); + doTestDouble("10mm", 100.0, MeasureUnit::MM, MeasureUnit::MM_10TH); + doTestDouble("20mm", 2000.0, MeasureUnit::MM, MeasureUnit::MM_100TH); + doTestDouble("300", 30.0, MeasureUnit::MM_10TH, MeasureUnit::MM); + doTestDouble("400", 4.0, MeasureUnit::MM_100TH, MeasureUnit::MM); + doTestDouble("600", 6000.0, MeasureUnit::MM_10TH, MeasureUnit::MM_100TH); + doTestDouble("700", 70.0, MeasureUnit::MM_100TH, MeasureUnit::MM_10TH); +} + +void doTestStringToMeasure(sal_Int32 rValue, char const*const pis, sal_Int16 nTargetUnit, sal_Int32 nMin, sal_Int32 nMax) +{ + OUString const is(OUString::createFromAscii(pis)); + sal_Int32 nVal; + bool bSuccess(Converter::convertMeasure(nVal, is, nTargetUnit, nMin, nMax)); + SAL_INFO("sax.cppunit","" << nVal); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_EQUAL(rValue, nVal); +} + +void doTestMeasureToString(char const*const pis, sal_Int32 nMeasure, sal_Int16 const nSourceUnit, sal_Int16 const nTargetUnit) +{ + OUString const is(OUString::createFromAscii(pis)); + OUStringBuffer buf; + Converter::convertMeasure(buf, nMeasure, nSourceUnit, nTargetUnit); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear()); +} + +void ConverterTest::testMeasure() +{ + //check all the measure units + doTestStringToMeasure(1000, "10mm", MeasureUnit::MM_100TH, -1, 4321); + doTestStringToMeasure(200, "20mm", MeasureUnit::MM_10TH, 12, 4567); + doTestStringToMeasure(300, "300", MeasureUnit::MM, 31, 555); + doTestStringToMeasure(400, "400", MeasureUnit::CM, 10, 4321); + doTestStringToMeasure(120, "120", MeasureUnit::INCH_1000TH, 10, 4321); + doTestStringToMeasure(111, "111", MeasureUnit::INCH_100TH, 10, 4321); + doTestStringToMeasure(22, "22", MeasureUnit::INCH_10TH, 10, 4321); + doTestStringToMeasure(27, "27", MeasureUnit::INCH, 10, 4321); + doTestStringToMeasure(52, "52", MeasureUnit::POINT, 10, 4321); + doTestStringToMeasure(120, "120", MeasureUnit::TWIP, 10, 4321); + doTestStringToMeasure(666, "666", MeasureUnit::M, 10, 4321); + doTestStringToMeasure(42, "42", MeasureUnit::KM, 10, 4321); + doTestStringToMeasure(30, "30", MeasureUnit::PICA, 10, 4321); + doTestStringToMeasure(20, "20", MeasureUnit::FOOT, 10, 4321); + doTestStringToMeasure(40, "40", MeasureUnit::MILE, 10, 4321); + doTestStringToMeasure(40, "40%", MeasureUnit::PERCENT, 10, 4321); + doTestStringToMeasure(800, "800", MeasureUnit::PIXEL, 10, 4321); + doTestStringToMeasure(600, "600px", MeasureUnit::PIXEL, 10, 4321); + doTestStringToMeasure(777, "777", MeasureUnit::APPFONT, 10, 4321); + doTestStringToMeasure(80000, "80000", MeasureUnit::SYSFONT, 10, 432100); + //strange values (negative, too large etc.) + doTestStringToMeasure(555, "666", MeasureUnit::MM, -1000, 555); + doTestStringToMeasure(-1000, "-1001", MeasureUnit::MM, -1000, 555); + doTestStringToMeasure(0, "-0", MeasureUnit::MM, -1, 0); + doTestStringToMeasure(::std::numeric_limits<sal_Int32>::max(), "1234567890mm", MeasureUnit::MM_10TH, 12, ::std::numeric_limits<sal_Int32>::max()); + doTestStringToMeasure(-300, "-300", MeasureUnit::MM, -1000, 555); + doTestStringToMeasure(::std::numeric_limits<sal_Int32>::min(), "-999999999999999px", MeasureUnit::PIXEL, ::std::numeric_limits<sal_Int32>::min(), 555); //really crazy numbers... + + doTestMeasureToString("6mm", 600, MeasureUnit::MM_100TH, MeasureUnit::MM); + doTestMeasureToString("0.005cm", 000000005, MeasureUnit::MM_100TH, MeasureUnit::CM); // zeros in the front doesn't count + doTestMeasureToString("3mm", 30, MeasureUnit::MM_10TH, MeasureUnit::MM); + doTestMeasureToString("6.66cm", 666, MeasureUnit::MM_10TH, MeasureUnit::CM); + doTestMeasureToString("-157.3pt", -555, MeasureUnit::MM_10TH, MeasureUnit::POINT); + doTestMeasureToString("174976.378in", 44444000, MeasureUnit::MM_10TH, MeasureUnit::INCH); //let's check accuracy + doTestMeasureToString("40%", 40, MeasureUnit::PERCENT, MeasureUnit::PERCENT); + doTestMeasureToString("70.56mm", 4000, MeasureUnit::TWIP, MeasureUnit::MM); + doTestMeasureToString("979.928cm", 555550, MeasureUnit::TWIP, MeasureUnit::CM); + doTestMeasureToString("111.1pt", 2222, MeasureUnit::TWIP, MeasureUnit::POINT); + doTestMeasureToString("385.7986in", 555550, MeasureUnit::TWIP, MeasureUnit::INCH); + doTestMeasureToString("-2147483.648cm", std::numeric_limits<sal_Int32>::min(), MeasureUnit::MM_100TH, MeasureUnit::CM); +} + +void doTestStringToBool(bool bBool, char const*const pis) +{ + OUString const is(OUString::createFromAscii(pis)); + bool bTemp; + bool bSuccess(Converter::convertBool(bTemp, is)); + SAL_INFO("sax.cppunit","" << bTemp); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_EQUAL(bBool, bTemp); + +} + +void doTestBoolToString(char const*const pis, bool bValue ) +{ + OUString const is(OUString::createFromAscii(pis)); + OUStringBuffer buf; + Converter::convertBool(buf, bValue); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear()); +} + +void ConverterTest::testBool() +{ + doTestStringToBool(true, "true"); + doTestStringToBool(false, "false"); + doTestBoolToString("true", true); + doTestBoolToString("false", false); +} + +void doTestStringToPercent(sal_Int32 nValue, char const*const pis) +{ + OUString const is(OUString::createFromAscii(pis)); + sal_Int32 nTemp; + bool bSuccess(Converter::convertPercent(nTemp, is)); + SAL_INFO("sax.cppunit","" << nTemp); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_EQUAL(nValue, nTemp); +} + +void doTestPercentToString(char const*const pis, sal_Int32 nValue) +{ + OUString const is(OUString::createFromAscii(pis)); + OUStringBuffer buf; + Converter::convertPercent(buf, nValue); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear()); +} + +void ConverterTest::testPercent() +{ + doTestStringToPercent(40, "40%"); + doTestStringToPercent(30, "30"); + doTestStringToPercent(120, "120%"); + doTestStringToPercent(-40, "-40%"); + doTestStringToPercent(0, "0%"); + doTestPercentToString("12%", 12); + doTestPercentToString("-123%", -123); + doTestPercentToString("0%", 0); + doTestPercentToString("1%", 00001); +} + +void doTestStringToColor(sal_Int32 nValue, char const*const pis) +{ + OUString const is(OUString::createFromAscii(pis)); + sal_Int32 nTemp; + bool bSuccess(Converter::convertColor(nTemp, is)); + SAL_INFO("sax.cppunit","" << nTemp); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_EQUAL(nValue, nTemp); +} + +void doTestColorToString(char const*const pis, sal_Int32 nValue) +{ + OUString const is(OUString::createFromAscii(pis)); + OUStringBuffer buf; + Converter::convertColor(buf, nValue); + SAL_INFO("sax.cppunit","" << buf.toString()); + CPPUNIT_ASSERT_EQUAL(is, buf.makeStringAndClear()); +} + +void ConverterTest::testColor() +{ + doTestStringToColor(11259375, "#abcdef"); + doTestStringToColor(160, "#0000a0"); + doTestStringToColor(40960, "#00a000"); + doTestStringToColor(0, "#000000"); + doTestColorToString("#000615", 1557); + doTestColorToString("#5bcd15", 123456789); + doTestColorToString("#fffac7", -1337); + doTestColorToString("#000000", 0); +} + +void doTestStringToNumber(sal_Int32 nValue, char const*const pis, sal_Int32 nMin, sal_Int32 nMax) +{ + OUString const is(OUString::createFromAscii(pis)); + sal_Int32 nTemp; + bool bSuccess(Converter::convertNumber(nTemp, is, nMin, nMax)); + SAL_INFO("sax.cppunit","" << nTemp); + CPPUNIT_ASSERT(bSuccess); + CPPUNIT_ASSERT_EQUAL(nValue, nTemp); +} + +void ConverterTest::testNumber() +{ + doTestStringToNumber(30, "30", 1, 40); + doTestStringToNumber(1, "-5", 1, 300); + doTestStringToNumber(-30, "7", -100, -30); + doTestStringToNumber(0, "-0", 0, 1); + doTestStringToNumber(0, "666", -0, 0); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(ConverterTest); + +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/qa/cppunit/xmlimport.cxx b/sax/qa/cppunit/xmlimport.cxx new file mode 100644 index 0000000000..1eb872d505 --- /dev/null +++ b/sax/qa/cppunit/xmlimport.cxx @@ -0,0 +1,454 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> +#include <sal/types.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> +#include <test/bootstrapfixture.hxx> +#include <cppuhelper/implbase.hxx> +#include <com/sun/star/beans/Pair.hpp> +#include <com/sun/star/xml/sax/SAXException.hpp> +#include <com/sun/star/xml/sax/XDocumentHandler.hpp> +#include <com/sun/star/xml/sax/XFastTokenHandler.hpp> +#include <comphelper/processfactory.hxx> +#include <com/sun/star/xml/sax/Parser.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/XLocator.hpp> +#include <com/sun/star/xml/sax/FastToken.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <osl/file.hxx> +#include <unotools/ucbstreamhelper.hxx> +#include <unotools/streamwrap.hxx> +#include <sax/fastattribs.hxx> +#include <stack> +#include <string_view> +#include <deque> +#include <rtl/ref.hxx> + + +namespace { + +using namespace css; +using namespace uno; +using namespace io; +using namespace xml::sax; +using namespace ::osl; +using namespace sax_fastparser; + +Reference< XInputStream > createStreamFromFile ( + const OUString & filePath) +{ + Reference< XInputStream > xInputStream; + OUString aInStr; + FileBase::getFileURLFromSystemPath(filePath, aInStr); + std::unique_ptr<SvStream> pStream = utl::UcbStreamHelper::CreateStream(aInStr, StreamMode::READ); + if(pStream == nullptr) + CPPUNIT_ASSERT(false); + Reference< XStream > xStream(new utl::OStreamWrapper(std::move(pStream))); + xInputStream.set(xStream, UNO_QUERY); + return xInputStream; +} + +class TestDocumentHandler : public cppu::WeakImplHelper< XDocumentHandler > +{ +private: + OUString m_aStr; + std::deque< std::pair<OUString,OUString> > m_aNamespaceStack; + std::stack<sal_uInt16> m_aCountStack; + + OUString canonicalform(const OUString &sName, const OUString &sValue, bool isElement); + OUString getNamespace(std::u16string_view sName); + +public: + TestDocumentHandler() {} + const OUString & getString() const { return m_aStr; } + + // XDocumentHandler + virtual void SAL_CALL startDocument() override; + virtual void SAL_CALL endDocument() override; + virtual void SAL_CALL startElement( const OUString& aName, const Reference< XAttributeList >& xAttribs ) override; + virtual void SAL_CALL endElement( const OUString& aName ) override; + virtual void SAL_CALL characters( const OUString& aChars ) override; + virtual void SAL_CALL ignorableWhitespace( const OUString& aWhitespaces ) override; + virtual void SAL_CALL processingInstruction( const OUString& aTarget, const OUString& aData ) override; + virtual void SAL_CALL setDocumentLocator( const Reference< XLocator >& xLocator ) override; +}; + +OUString TestDocumentHandler::canonicalform(const OUString &sName, const OUString &sValue, bool isElement) +{ + sal_Int16 nIndex = sName.indexOf(":"); + if ( !isElement && sName.match( "xmlns" ) ) + { + m_aCountStack.top() += 1; + if ( nIndex < 0 ) + m_aNamespaceStack.emplace_back( OUString( "default" ), sValue ); + else + m_aNamespaceStack.emplace_back( sName.copy( nIndex + 1 ), sValue ); + } + else + { + if ( nIndex >= 0 ) + { + OUString sNamespace = getNamespace( sName.subView( 0, nIndex ) ); + return sNamespace + sName.subView(nIndex); + } + else + { + OUString sDefaultns = getNamespace( u"default" ); + if ( !isElement || sDefaultns.isEmpty() ) + return sName; + else + return sDefaultns + ":" + sName; + } + } + return OUString(); +} + +OUString TestDocumentHandler::getNamespace(std::u16string_view sName) +{ + for (sal_Int16 i = m_aNamespaceStack.size() - 1; i>=0; i--) + { + std::pair<OUString, OUString> aPair = m_aNamespaceStack.at(i); + if (aPair.first == sName) + return aPair.second; + } + return OUString(); +} + +void SAL_CALL TestDocumentHandler::startDocument() +{ + m_aStr.clear(); + m_aNamespaceStack.clear(); + m_aNamespaceStack.emplace_back( std::make_pair( OUString( "default" ), OUString() ) ); + m_aCountStack = std::stack<sal_uInt16>(); + m_aCountStack.emplace(0); +} + + +void SAL_CALL TestDocumentHandler::endDocument() +{ +} + +void SAL_CALL TestDocumentHandler::startElement( const OUString& aName, const Reference< XAttributeList >& xAttribs ) +{ + OUString sAttributes; + m_aCountStack.push(0); + sal_uInt16 len = xAttribs->getLength(); + for (sal_uInt16 i=0; i<len; i++) + { + OUString sAttrValue = xAttribs->getValueByIndex(i); + OUString sAttrName = canonicalform(xAttribs->getNameByIndex(i), sAttrValue, false); + if (!sAttrName.isEmpty()) + sAttributes += sAttrName + sAttrValue; + } + m_aStr += canonicalform(aName, "", true) + sAttributes; +} + + +void SAL_CALL TestDocumentHandler::endElement( const OUString& aName ) +{ + m_aStr += canonicalform(aName, "", true); + sal_uInt16 nPopQty = m_aCountStack.top(); + for (sal_uInt16 i=0; i<nPopQty; i++) + m_aNamespaceStack.pop_back(); + m_aCountStack.pop(); +} + + +void SAL_CALL TestDocumentHandler::characters( const OUString& aChars ) +{ + m_aStr += aChars; +} + + +void SAL_CALL TestDocumentHandler::ignorableWhitespace( const OUString& aWhitespaces ) +{ + m_aStr += aWhitespaces; +} + + +void SAL_CALL TestDocumentHandler::processingInstruction( const OUString& aTarget, const OUString& aData ) +{ + m_aStr += aTarget + aData; +} + + +void SAL_CALL TestDocumentHandler::setDocumentLocator( const Reference< XLocator >& /*xLocator*/ ) +{ +} + +class NSDocumentHandler : public cppu::WeakImplHelper< XDocumentHandler > +{ +public: + NSDocumentHandler() {} + + // XDocumentHandler + virtual void SAL_CALL startDocument() override {} + virtual void SAL_CALL endDocument() override {} + virtual void SAL_CALL startElement( const OUString& aName, const Reference< XAttributeList >& xAttribs ) override; + virtual void SAL_CALL endElement( const OUString& /* aName */ ) override {} + virtual void SAL_CALL characters( const OUString& /* aChars */ ) override {} + virtual void SAL_CALL ignorableWhitespace( const OUString& /* aWhitespaces */ ) override {} + virtual void SAL_CALL processingInstruction( const OUString& /* aTarget */, const OUString& /* aData */ ) override {} + virtual void SAL_CALL setDocumentLocator( const Reference< XLocator >& /* xLocator */ ) override {} +}; + +OUString getNamespaceValue( std::u16string_view rNamespacePrefix ) +{ + OUString aNamespaceURI; + if (rNamespacePrefix == u"office") + aNamespaceURI = "urn:oasis:names:tc:opendocument:xmlns:office:1.0"; + else if (rNamespacePrefix == u"text") + aNamespaceURI = "urn:oasis:names:tc:opendocument:xmlns:text:1.0"; + else if (rNamespacePrefix == u"note") + aNamespaceURI = "urn:oasis:names:tc:opendocument:xmlns:text:1.0"; + return aNamespaceURI; +} + +OUString resolveNamespace( const OUString& aName ) +{ + int index; + if (( index = aName.indexOf( ':' )) > 0 ) + { + if ( aName.getLength() > index + 1 ) + { + OUString aAttributeName = getNamespaceValue( aName.subView( 0, index ) ) + + ":" + aName.subView( index + 1 ); + return aAttributeName; + } + } + return aName; +} + +void SAL_CALL NSDocumentHandler::startElement( const OUString& aName, const Reference< XAttributeList >&/* xAttribs */ ) +{ + if (! (aName == "office:document" || aName == "office:body" || aName == "office:text" || + aName == "text:p" || aName == "note:p") ) + CPPUNIT_ASSERT(false); + + OUString sResolvedName = resolveNamespace(aName); + if (! ( sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:office:1.0:document" || + sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:office:1.0:body" || + sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:office:1.0:text" || + sResolvedName == "urn:oasis:names:tc:opendocument:xmlns:text:1.0:p") ) + CPPUNIT_ASSERT(false); +} + +class DummyTokenHandler : public sax_fastparser::FastTokenHandlerBase +{ +public: + const static std::string_view tokens[]; + const static std::u16string_view namespaceURIs[]; + const static std::string_view namespacePrefixes[]; + + // XFastTokenHandler + virtual Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) override; + virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier ) override; + //FastTokenHandlerBase + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override; +}; + +const std::string_view DummyTokenHandler::tokens[] = { + "Signature", "CanonicalizationMethod", + "Algorithm", "Type", + "DigestMethod", "Reference", + "document", "spacing", + "Player", "Height" }; + +const std::u16string_view DummyTokenHandler::namespaceURIs[] = { + u"http://www.w3.org/2000/09/xmldsig#", + u"http://schemas.openxmlformats.org/wordprocessingml/2006/main/", + u"xyzsports.com/players/football/" }; + +const std::string_view DummyTokenHandler::namespacePrefixes[] = { + "", + "w", + "Player" }; + +Sequence< sal_Int8 > DummyTokenHandler::getUTF8Identifier( sal_Int32 nToken ) +{ + std::string_view aUtf8Token; + if ( ( nToken & 0xffff0000 ) != 0 ) //namespace + { + sal_uInt32 nNamespaceToken = ( nToken >> 16 ) - 1; + if ( nNamespaceToken < std::size(namespacePrefixes) ) + aUtf8Token = namespacePrefixes[ nNamespaceToken ]; + } + else //element or attribute + { + size_t nElementToken = nToken & 0xffff; + if ( nElementToken < std::size(tokens) ) + aUtf8Token = tokens[ nElementToken ]; + } + Sequence< sal_Int8 > aSeq( reinterpret_cast< const sal_Int8* >( + aUtf8Token.data() ), aUtf8Token.size() ); + return aSeq; +} + +sal_Int32 DummyTokenHandler::getTokenFromUTF8( const uno::Sequence< sal_Int8 >& rIdentifier ) +{ + return getTokenDirect( reinterpret_cast< const char* >( + rIdentifier.getConstArray() ), rIdentifier.getLength() ); +} + +sal_Int32 DummyTokenHandler::getTokenDirect( const char* pToken, sal_Int32 nLength ) const +{ + std::string_view sToken( pToken, nLength ); + for( size_t i = 0; i < std::size(tokens); i++ ) + { + if ( tokens[i] == sToken ) + return static_cast<sal_Int32>(i); + } + return FastToken::DONTKNOW; +} + + +class XMLImportTest : public test::BootstrapFixture +{ +private: + OUString m_sDirPath; + rtl::Reference< TestDocumentHandler > m_xDocumentHandler; + Reference< XParser > m_xParser; + Reference< XParser > m_xLegacyFastParser; + +public: + virtual void setUp() override; + + XMLImportTest() : BootstrapFixture(true, false) {} + void parse(); + void testMissingNamespaceDeclaration(); + void testIllegalNamespaceUse(); + + CPPUNIT_TEST_SUITE( XMLImportTest ); + CPPUNIT_TEST( parse ); + CPPUNIT_TEST( testMissingNamespaceDeclaration ); + CPPUNIT_TEST( testIllegalNamespaceUse ); + CPPUNIT_TEST_SUITE_END(); +}; + +void XMLImportTest::setUp() +{ + test::BootstrapFixture::setUp(); + Reference< XComponentContext > xContext = comphelper::getProcessComponentContext(); + m_xDocumentHandler.set( new TestDocumentHandler() ); + m_xParser = Parser::create( xContext ); + m_xParser->setDocumentHandler( m_xDocumentHandler ); + m_xLegacyFastParser.set( xContext->getServiceManager()->createInstanceWithContext + ( "com.sun.star.xml.sax.LegacyFastParser", xContext ), UNO_QUERY ); + m_xLegacyFastParser->setDocumentHandler( m_xDocumentHandler ); + + Reference< XFastTokenHandler > xTokenHandler; + xTokenHandler.set( new DummyTokenHandler ); + uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser, + uno::UNO_QUERY_THROW); + xInit->initialize({ uno::Any(xTokenHandler) }); + + sal_Int32 nNamespaceCount = SAL_N_ELEMENTS(DummyTokenHandler::namespaceURIs); + uno::Sequence<uno::Any> namespaceArgs( nNamespaceCount + 1 ); + auto p_namespaceArgs = namespaceArgs.getArray(); + p_namespaceArgs[0] <<= OUString( "registerNamespaces" ); + for (sal_Int32 i = 1; i <= nNamespaceCount; i++ ) + { + css::beans::Pair<OUString, sal_Int32> rPair( OUString(DummyTokenHandler::namespaceURIs[i - 1]), i << 16 ); + p_namespaceArgs[i] <<= rPair; + } + xInit->initialize( namespaceArgs ); + + m_sDirPath = m_directories.getPathFromSrc( u"/sax/qa/data/" ); +} + +void XMLImportTest::parse() +{ + OUString fileNames[] = {"simple.xml", "defaultns.xml", "inlinens.xml", + "multiplens.xml", "multiplepfx.xml", + "nstoattributes.xml", "nestedns.xml", "testthreading.xml"}; + + for (size_t i = 0; i < std::size( fileNames ); i++) + { + InputSource source; + source.sSystemId = "internal"; + + source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] ); + m_xParser->parseStream(source); + const OUString rParserStr = m_xDocumentHandler->getString(); + + source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] ); + m_xLegacyFastParser->parseStream(source); + const OUString rLegacyFastParserStr = m_xDocumentHandler->getString(); + + CPPUNIT_ASSERT_EQUAL( rParserStr, rLegacyFastParserStr ); + // OString o = OUStringToOString( Str, RTL_TEXTENCODING_ASCII_US ); + // CPPUNIT_ASSERT_MESSAGE( string(o.pData->buffer), false ); + } +} + +void XMLImportTest::testMissingNamespaceDeclaration() +{ + OUString fileNames[] = { "manifestwithnsdecl.xml", "manifestwithoutnsdecl.xml" }; + + uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser, + uno::UNO_QUERY_THROW); + xInit->initialize({ uno::Any(OUString("IgnoreMissingNSDecl")) }); + + for (sal_uInt16 i = 0; i < std::size( fileNames ); i++) + { + try + { + InputSource source; + source.sSystemId = "internal"; + + source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] ); + m_xParser->parseStream(source); + const OUString rParserStr = m_xDocumentHandler->getString(); + + source.aInputStream = createStreamFromFile( m_sDirPath + fileNames[i] ); + m_xLegacyFastParser->parseStream(source); + const OUString rLegacyFastParserStr = m_xDocumentHandler->getString(); + + CPPUNIT_ASSERT_EQUAL( rParserStr, rLegacyFastParserStr ); + } + catch( const SAXException & ) + { + } + } +} + +void XMLImportTest::testIllegalNamespaceUse() +{ + rtl::Reference< NSDocumentHandler > m_xNSDocumentHandler; + m_xNSDocumentHandler.set( new NSDocumentHandler() ); + m_xParser->setDocumentHandler( m_xNSDocumentHandler ); + InputSource source; + source.sSystemId = "internal"; + + source.aInputStream = createStreamFromFile( m_sDirPath + "multiplepfx.xml" ); + m_xParser->parseStream(source); + + m_xLegacyFastParser->setDocumentHandler( m_xNSDocumentHandler ); + source.aInputStream = createStreamFromFile( m_sDirPath + "multiplepfx.xml" ); + m_xLegacyFastParser->parseStream(source); +} + +CPPUNIT_TEST_SUITE_REGISTRATION( XMLImportTest ); +} //namespace + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/qa/data/defaultns.xml b/sax/qa/data/defaultns.xml new file mode 100644 index 0000000000..2e7819b164 --- /dev/null +++ b/sax/qa/data/defaultns.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" ?> +<Books xmlns="http://xyzbooks.com/books/"> + <Book> + <Title>War and Peace</Title> + <Author>Leo Tolstoy</Author> + </Book> + <Book> + <Title>To Kill a Mockingbird</Title> + <Author>Harper Lee</Author> + </Book> +</Books> diff --git a/sax/qa/data/inlinens.xml b/sax/qa/data/inlinens.xml new file mode 100644 index 0000000000..02c4214085 --- /dev/null +++ b/sax/qa/data/inlinens.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" ?> +<Students xmlns="http://xyzuniversity.org/student/"> + <Student xmlns:ug="http://xyzuniversity.org/student/ug/"> + <Name>ABC</Name> + <ug:Branch>Computer Science</ug:Branch> + <ug:Grade>7.9</ug:Grade> + </Student> + <Student xmlns:pg="http://xyzuniversity.org/student/pg/"> + <Name>PQR</Name> + <pg:Field>Artificial Intelligence</pg:Field> + </Student> +</Students>
\ No newline at end of file diff --git a/sax/qa/data/manifestwithnsdecl.xml b/sax/qa/data/manifestwithnsdecl.xml new file mode 100644 index 0000000000..ac61c3e206 --- /dev/null +++ b/sax/qa/data/manifestwithnsdecl.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="UTF-8"?> +<manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" manifest:version="1.2"> + <manifest:file-entry manifest:full-path="/" manifest:version="1.2" manifest:media-type="application/vnd.oasis.opendocument.text"/> + <manifest:file-entry manifest:full-path="Thumbnails/thumbnail.png" manifest:media-type="image/png"/> + <manifest:file-entry manifest:full-path="content.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="styles.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="meta.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="settings.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="Configurations2/accelerator/current.xml" manifest:media-type=""/> + <manifest:file-entry manifest:full-path="Configurations2/" manifest:media-type="application/vnd.sun.xml.ui.configuration"/> + <manifest:file-entry manifest:full-path="manifest.rdf" manifest:media-type="application/rdf+xml"/> +</manifest:manifest> diff --git a/sax/qa/data/manifestwithoutnsdecl.xml b/sax/qa/data/manifestwithoutnsdecl.xml new file mode 100644 index 0000000000..1c8f535963 --- /dev/null +++ b/sax/qa/data/manifestwithoutnsdecl.xml @@ -0,0 +1,12 @@ +<?xml version="1.0" encoding="UTF-8"?> +<manifest:manifest> + <manifest:file-entry manifest:full-path="/" manifest:version="1.2" manifest:media-type="application/vnd.oasis.opendocument.text"/> + <manifest:file-entry manifest:full-path="Thumbnails/thumbnail.png" manifest:media-type="image/png"/> + <manifest:file-entry manifest:full-path="content.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="styles.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="meta.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="settings.xml" manifest:media-type="text/xml"/> + <manifest:file-entry manifest:full-path="Configurations2/accelerator/current.xml" manifest:media-type=""/> + <manifest:file-entry manifest:full-path="Configurations2/" manifest:media-type="application/vnd.sun.xml.ui.configuration"/> + <manifest:file-entry manifest:full-path="manifest.rdf" manifest:media-type="application/rdf+xml"/> +</manifest:manifest> diff --git a/sax/qa/data/multiplens.xml b/sax/qa/data/multiplens.xml new file mode 100644 index 0000000000..e1dc4ce01e --- /dev/null +++ b/sax/qa/data/multiplens.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" ?> +<Athletes> + <Player xmlns:Player="xyzsports.com/players/football/"> + <Player:Name>Lionel Messi</Player:Name> + <Player:Height>1.70 m</Player:Height> + <Player:Position>Forward</Player:Position> + </Player> + <Player xmlns:Player="xyzsports.com/players/Cricket/"> + <Player:Name>Sachin Ramesh Tendulkar</Player:Name> + <Player:Height>165 cm</Player:Height> + <Player:Style>Right handed</Player:Style> + </Player> +</Athletes>
\ No newline at end of file diff --git a/sax/qa/data/multiplepfx.xml b/sax/qa/data/multiplepfx.xml new file mode 100644 index 0000000000..b7686cad5f --- /dev/null +++ b/sax/qa/data/multiplepfx.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" ?> +<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"> + <office:body> + <office:text> + <text:p xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" text:style-name="Title">Test Document </text:p> + <note:p xmlns:note="urn:oasis:names:tc:opendocument:xmlns:text:1.0" note:style-name="Heading">For testing purposes only</note:p> + </office:text> + </office:body> +</office:document> diff --git a/sax/qa/data/nestedns.xml b/sax/qa/data/nestedns.xml new file mode 100644 index 0000000000..566332b40a --- /dev/null +++ b/sax/qa/data/nestedns.xml @@ -0,0 +1,32 @@ +<?xml version="1.0" ?> +<?pi-target pi-data?> +<Elements> + <Book xmlns:lib="http://www.library.com/"> + <lib:Title>Sherlock Holmes - I</lib:Title> + <lib:Author>Arthur Conan Doyle</lib:Author> + <purchase xmlns:lib="http://www.otherlibrary.com/"> + <lib:Title>Sherlock Holmes - II</lib:Title> + <lib:Author>Arthur Conan Doyle</lib:Author> + </purchase> + <lib:Title>Sherlock Holmes - III</lib:Title> + <lib:Author>Arthur Conan Doyle</lib:Author> + </Book> + <Electronics xmlns="http://doesntexist.com/electronics/"> + <item> + <Name>Apple iPhone 6s</Name> + <?pi-target-only?> + <Price>$324</Price> + </item> + <item xmlns="http://doesntexist.com/dailyuse/"> + <Name>Philips Aqua Touch Shaver</Name> + <item xmlns="http://doesntexist.com/dailyuse/model/"> + <Model>AT890</Model> + <Price>$74</Price> + </item> + </item> + <item> + <Name>Macbook Pro</Name> + <Price>$500</Price> + </item> + </Electronics> +</Elements>
\ No newline at end of file diff --git a/sax/qa/data/nstoattributes.xml b/sax/qa/data/nstoattributes.xml new file mode 100644 index 0000000000..dee2edfdcb --- /dev/null +++ b/sax/qa/data/nstoattributes.xml @@ -0,0 +1,17 @@ +<?xml version="1.0" ?> +<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main/"> + <w:body> + <w:p w:rsidR="009A1A3D" w:rsidRPr="00BA52A7" w:rsidRDefault="00225691" w:rsidP="00BA52A7"> + <w:pPr xyz="abc"> + <w:spacing w:line="276" w:lineRule="auto"/> + <w:rPr> + <w:rFonts w:asciiTheme="minorHAnsi" w:hAnsiTheme="minorHAnsi"/> + <w:sz w:val="24" val="27" /> + <w:szCs w:val="24"/> + </w:rPr> + </w:pPr> + <w:bookmarkStart w:id="0" w:name="page1"/> + <w:bookmarkEnd w:id="0"/> + </w:p> + </w:body> +</w:document>
\ No newline at end of file diff --git a/sax/qa/data/simple.xml b/sax/qa/data/simple.xml new file mode 100644 index 0000000000..67c4fbde57 --- /dev/null +++ b/sax/qa/data/simple.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" ?> +<TVActors> + <Actor> + <Name>Bryan Cranston</Name> + <Show>Breaking Bad</Show> + </Actor> + <Actor> + <Name>Peter Dinklage</Name> + <Show>Game of Thrones</Show> + </Actor> +</TVActors>
\ No newline at end of file diff --git a/sax/qa/data/testthreading.xml b/sax/qa/data/testthreading.xml new file mode 100644 index 0000000000..0d05fd9959 --- /dev/null +++ b/sax/qa/data/testthreading.xml @@ -0,0 +1,5 @@ +<?xml version="1.0" encoding="UTF-8"?><Signature xmlns="http://www.w3.org/2000/09/xmldsig#" Id="idPackageSignature"><SignedInfo><CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/><SignatureMethod Algorithm="http://www.w3.org/2001/04/xmldsig-more#rsa-sha256"/><Reference Type="http://www.w3.org/2000/09/xmldsig#Object" URI="#idPackageObject"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>NOjlH6v6g2ojDvQoi4M5B8Bght0y3ES4fjxlRk2xtVE=</DigestValue></Reference><Reference Type="http://www.w3.org/2000/09/xmldsig#Object" URI="#idOfficeObject"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>mq1H1GLrTZSuUUTqX5HpjytFwl8nJFggNsXJUgQZT0U=</DigestValue></Reference><Reference Type="http://uri.etsi.org/01903#SignedProperties" URI="#idSignedProperties"><Transforms><Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>6jeT5n6jeqOspz6K6BIgitwfLZf4s1/uS9dPdOQRF8s=</DigestValue></Reference></SignedInfo><SignatureValue>UZwOWZbOm9ysRa+eYwoksUW8d+jBFPOkqOtfTvT8waHWDe3siDc4N79W6jPCYMIHMtPQjCb4qMDe +dhCYvg6TTWFWCvU/br+A1Qo0xovWHD3DwB29qk7NDBfbnEIPxbOe2D70cZa86Zl2MgW5YqQlgRFH +g6+XxwBUp+ZYx4knFWEg8zgbe3JnV7zeI/RG/1iq9TYH1GUBloF10df4qaulrp2AUkdSvnnUcxRP +ZfbS+14YxUHkW0UTyV+6ZeXqtTnXS0F/LG5JH2/xkN+mgwLB6TPfxtQD6vcj+Tdnf0hHlnuOmvBS +L7Pn+zwS0ueMOTxIozcaYPsmJc9fVpEU2I59PA==</SignatureValue><KeyInfo><X509Data><X509Certificate>MIIE7jCCAtagAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwVzELMAkGA1UEBhMCVUsxEDAOBgNVBAgMB0VuZ2xhbmQxEjAQBgNVBAoMCVRTQ1AgVGVzdDEiMCAGA1UEAwwZVFNDUCBJbnRlcm1lZGlhdGUgUm9vdCBDQTAeFw0xNTEyMTgwNzU4MTlaFw0xNjEyMjcwNzU4MTlaMFUxCzAJBgNVBAYTAlVLMRAwDgYDVQQIDAdFbmdsYW5kMRIwEAYDVQQKDAlUU0NQIFRlc3QxIDAeBgNVBAMMF1RTQ1AgVGVzdCBleGFtcGxlIEFsaWNlMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA3m2YNdX+nc1LkhlrNrcIPI3yCWnv0/0k9zDKpKiwjMH4vjWM46M6ptAiupxVpAMW5ojnhEyxaNHvZNsCwddYE6778hut2SJvz0szSBuHUuedcALI2EhVwdM0yLqfGo6WGeOIBDId49TemdNCMhk2zOpb1BqYhKls0LfdbxT/an3JaDmmLhPjvgYMJNYVX86L199OQFLJ1zLqQ0YirkKqXL9cSPmyYBKjgnqQ4Z5YfPL63EP0TsEfa5oQmy/0gS5FB2Wz9CqIptB130v0GR4XObTpOkhPFfC5RDBFTMZoi4NCK10wn2NCbr7qZ3aMrOlfeKbsNIifwu0KYFHXyxL5AwIDAQABo4HFMIHCMAkGA1UdEwQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMDMGCWCGSAGG+EIBDQQmFiRPcGVuU1NMIEdlbmVyYXRlZCBDbGllbnQgQ2VydGlmaWNhdGUwHQYDVR0OBBYEFCL6DzsuAbni8475Z+HkX5tv8iiWMB8GA1UdIwQYMBaAFMuejS1rWjUf3x1+2QbPSVpuXFl+MA4GA1UdDwEB/wQEAwIF4DAdBgNVHSUEFjAUBggrBgEFBQcDAgYIKwYBBQUHAwQwDQYJKoZIhvcNAQELBQADggIBAFs0DeCDjttHQ0UHsYcnhfBCWRdOFdIr3F/IEbN2BL/grScGXoXRaYMIQJv/s5dKgZIuH7xMCVKazoftPVqU4bOEduAv0IJ6hQF/wEMBueA0UjvQQVYZgsOALi7TD3gYpFqYcH2Wfx5/5Ln6dllL8UsHoP+6gSLaYwjJd7FQ+IlNTzR65dRMLoJhoKqqyuM6cf/PM8sbK2NH2r8toypjfPixvD/w3wP7xn4oo/IGXcRK4DTHBF/rSMqeR6ePwXm5tVHrQBfnxN3dsGsXkQgqzBvvbPY0raraO4CPR7mZp4GVFHOsUNh5TI1SlfxWZ49HU3F5jWeiI9jPuw1RmuAyZdFEt403Wi67v6revXe1By6UqIZjq3b2pJGBKZH+60P1cJScawzrN8pi1qQFV8JiiJM6/MSciqplTT5F7SG0XZx1CjnBz5rMdYNhI9NNtF3oy9Xy9RvgYehFaC43ZlBBUMDmZFj5a78hOOkkq1UnrHUdeXyWhiEFzv5d8My2i0kWGq8r0HuC25BmOa17lHVxQ2o7Rdu9jDFP9oNizC7kQfA5QVRTfBFcWH7jml69RmVgfM+X+wdQgen9hJAILhBzmDfeteJ5ZEaoEYtw3isOGkpSyg7odjgYq7I+bOiN1toDg07vzfIkvF9KxlkDeRLXbmcFIvQsqFeF6cUwlZQYLOHA</X509Certificate></X509Data></KeyInfo><Object Id="idPackageObject"><Manifest><Reference URI="/_rels/.rels?ContentType=application/vnd.openxmlformats-package.relationships+xml"><Transforms><Transform Algorithm="http://schemas.openxmlformats.org/package/2006/RelationshipTransform"><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId1"/></Transform><Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>Mq3mDDWudLiaQFa1psBgLG+/en7p7r8re0MtlxnuiUI=</DigestValue></Reference><Reference URI="/word/_rels/document.xml.rels?ContentType=application/vnd.openxmlformats-package.relationships+xml"><Transforms><Transform Algorithm="http://schemas.openxmlformats.org/package/2006/RelationshipTransform"><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId2"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId1"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId5"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId4"/><mdssi:RelationshipReference xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature" SourceId="rId3"/></Transform><Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/></Transforms><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>fLpr0+BDSSPPgsv2eBghgw8hu/vi7IslCQuCSKGf2X4=</DigestValue></Reference><Reference URI="/word/document.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>7Hrf+Oz9oMcqn7nOKgy1P39L313r8SO/pT3wQpVwq5k=</DigestValue></Reference><Reference URI="/word/fontTable.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>VZENondrzBVuF5GsYtsdSYgGNQS11L4XQ7vYiTn5PBE=</DigestValue></Reference><Reference URI="/word/settings.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>nYb+Wnf/ttBtq0PSR33tt+yKzvx6RdYmtEhD3XnBwaI=</DigestValue></Reference><Reference URI="/word/styles.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>4W+Exf0d9q8aHyBJ94YvTqRgdlhjR2r7F+eO3udy01s=</DigestValue></Reference><Reference URI="/word/theme/theme1.xml?ContentType=application/vnd.openxmlformats-officedocument.theme+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>L8HrIbYZUORASW6Jbsljhmie2tLKO9ld8ME0syr+ZDE=</DigestValue></Reference><Reference URI="/word/webSettings.xml?ContentType=application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>8JastnM5t30OLdmv2PyPNOe1YxAXc/Qz0O1UCFnWyxM=</DigestValue></Reference></Manifest><SignatureProperties><SignatureProperty Id="idSignatureTime" Target="#idPackageSignature"><mdssi:SignatureTime xmlns:mdssi="http://schemas.openxmlformats.org/package/2006/digital-signature"><mdssi:Format>YYYY-MM-DDThh:mm:ssTZD</mdssi:Format><mdssi:Value>2016-01-11T15:32:02Z</mdssi:Value></mdssi:SignatureTime></SignatureProperty></SignatureProperties></Object><Object Id="idOfficeObject"><SignatureProperties><SignatureProperty Id="idOfficeV1Details" Target="#idPackageSignature"><SignatureInfoV1 xmlns="http://schemas.microsoft.com/office/2006/digsig"><SetupID></SetupID><SignatureText></SignatureText><SignatureImage/><SignatureComments>purpose</SignatureComments><WindowsVersion>6.1</WindowsVersion><OfficeVersion>16.0</OfficeVersion><ApplicationVersion>16.0</ApplicationVersion><Monitors>1</Monitors><HorizontalResolution>1280</HorizontalResolution><VerticalResolution>800</VerticalResolution><ColorDepth>32</ColorDepth><SignatureProviderId>{00000000-0000-0000-0000-000000000000}</SignatureProviderId><SignatureProviderUrl></SignatureProviderUrl><SignatureProviderDetails>9</SignatureProviderDetails><SignatureType>1</SignatureType></SignatureInfoV1></SignatureProperty></SignatureProperties></Object><Object><xd:QualifyingProperties xmlns:xd="http://uri.etsi.org/01903/v1.3.2#" Target="#idPackageSignature"><xd:SignedProperties Id="idSignedProperties"><xd:SignedSignatureProperties><xd:SigningTime>2016-01-11T15:32:02Z</xd:SigningTime><xd:SigningCertificate><xd:Cert><xd:CertDigest><DigestMethod Algorithm="http://www.w3.org/2001/04/xmlenc#sha256"/><DigestValue>VRk3WQgpNIAnRxA9txzm7L2v1Iuwu2WrHpwa5WNTKV0=</DigestValue></xd:CertDigest><xd:IssuerSerial><X509IssuerName>CN=TSCP Intermediate Root CA, O=TSCP Test, S=England, C=UK</X509IssuerName><X509SerialNumber>4096</X509SerialNumber></xd:IssuerSerial></xd:Cert></xd:SigningCertificate><xd:SignaturePolicyIdentifier><xd:SignaturePolicyImplied/></xd:SignaturePolicyIdentifier></xd:SignedSignatureProperties></xd:SignedProperties><xd:UnsignedProperties><xd:UnsignedSignatureProperties><xd:CertificateValues><xd:EncapsulatedX509Certificate>MIIFiDCCA3CgAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwTzELMAkGA1UEBhMCVUsxEDAOBgNVBAgMB0VuZ2xhbmQxEjAQBgNVBAoMCVRTQ1AgVGVzdDEaMBgGA1UEAwwRVFNDUCBUZXN0IFJvb3QgQ0EwHhcNMTUxMjE4MDc1ODE5WhcNMjUxMjE1MDc1ODE5WjBXMQswCQYDVQQGEwJVSzEQMA4GA1UECAwHRW5nbGFuZDESMBAGA1UECgwJVFNDUCBUZXN0MSIwIAYDVQQDDBlUU0NQIEludGVybWVkaWF0ZSBSb290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAz7cet97WutWedG8L4cU7vjXkkzc8PkXnuvoF6ltIbDbnt2jt+0JH8xjGpfMVF+si0RPMwNQDmrTSj6SCpBcFV11hgyLXeovO17cSfbUYhl53ghyY0w/J3We19JJNeXtXlgZKrzre1gaXPxUG6Q4/TGo/gh9DmqEWjGkL3E6FboNfrIXHvOyhYT0kuTJkp8uqbBhhb/jO/FUV2DvTdoX04YZlFsheP00FHYz5ge6e3tG/WSeabTCUOsqq4+1jFOGNapvBrMkYC4QJwOc4bjIP8UmMlHqJae2mQVTVK3hiXtdMuS/H7rM/ZzCzcNQ2kyXUr843oul2FZ9UK3HI0TVrBFi0mpcWtvo/PMLNNXHJ/2MVZcHVgQ5+uBk0/Zq1vrqmxEev5t9lkKIzXRdztKP/EuBEjSvtcx+KZiHei+CvN+5nn5D0A6BfMYiQKmr2lVkCDs5v++iLINGbf54UoeM/dv2VEaC9bcdPeR8JHcvzS1f/cjHAbmnbMUumX985jha9SJavXkgWU6LgL5+txGD2X4Sb/+cf4ver1Zd1QGxqZ80wwK/qRUFGHWehcjCKAzeqnMRTJLUeNH5TcnvYv5fRq9wDMLzcjXXHaKEE+n3xt4fyCdEn5AFWeCPenqW3iZpEJTOHXm65Gj33/OTqoMLj8Mas3shoES1tL7LqQu/ZhgMCAwEAAaNmMGQwHQYDVR0OBBYEFMuejS1rWjUf3x1+2QbPSVpuXFl+MB8GA1UdIwQYMBaAFBtcCsdShJelvNJRLLHhVOy17wRXMBIGA1UdEwEB/wQIMAYBAf8CAQAwDgYDVR0PAQH/BAQDAgGGMA0GCSqGSIb3DQEBCwUAA4ICAQAzYDupn8D+Vh5NYcyiufM86F7GqmQ8Rows5R9o1c/5iax4G95Ley5EjUXo7/Hq2JSmtGVwkmlCBKduxsWmkbcBCJwr5AFX4TY8QFgTwKm4+IDlDA+Qs5m5bFKwbpJ8+oakz2L4j032hh6pRlezYh0P6ciVrTUNIFdR4GLusV8ronHa2AIJy9OIihI8zwDvT5rlPtDVs/wPiSq5+qcM/wnKo8X1JYU/tM1w6xvge0WNIf2yzedl2jZbbQm6wmCioCMZ7nUyyywC7WYFgCgOwOKfEa7pWhwCXpWt4MetNzXSpumurhrmn7B8y6NNarHNMhB9xv4Do4VwezMRCydnOGkl/B2fMMSoS3hxItJWDzEMkD51M6uk3yGnrQnApfEcGhEUTE8WvR8Il+Od4qFX7r50A5LguHXc4EshDJU4IaZEcYvOu91Xh2vsIZU72CXUELqMwJB7NjAkVwv8dQzHbKnK4E6y0zO1dHsjvwBfJl56PNRYx9fxJoBqdK4VrVVZAQ+wQ7wrvbjF2p5EnciHQS1NmLJjNeVvCY1dprYjp9mDxBFNNm4DhXYHF1TXpDqS7nC5cdJlTtdg4LxI4isMY/R1plDq2oxwaxhd1+5CxtMTsuv1A2qFvaRLUNS8SQE6PjWz1NxdczS5aBrXBfBFhxkEP3AtEbAyv0HiHidUAcq4pw==</xd:EncapsulatedX509Certificate><xd:EncapsulatedX509Certificate>MIIFhDCCA2ygAwIBAgIJAIc74NSdmtB4MA0GCSqGSIb3DQEBCwUAME8xCzAJBgNVBAYTAlVLMRAwDgYDVQQIDAdFbmdsYW5kMRIwEAYDVQQKDAlUU0NQIFRlc3QxGjAYBgNVBAMMEVRTQ1AgVGVzdCBSb290IENBMB4XDTE1MTIxODA3NTgxOFoXDTM1MTIxMzA3NTgxOFowTzELMAkGA1UEBhMCVUsxEDAOBgNVBAgMB0VuZ2xhbmQxEjAQBgNVBAoMCVRTQ1AgVGVzdDEaMBgGA1UEAwwRVFNDUCBUZXN0IFJvb3QgQ0EwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCwJHjSIV2YE31STJk/bvGrTY5O949l2TnA0nt3nlIC+oe2O+dLnMIwZsSvZwZdGMjvc25AoQHJAiX8zeq/0AVMz9mhm9y239ziJBnSdP4eLBEntPFtZZooFQUV9uXHejIJs3czZmQsOBz9Ko5cL3fC0qZYXeNL14MGpAWQ0EbEMaz0uDSz3BCcJBLSgXmCEBCbXNP7mCt9vWoa0nE8HQUVmiB+SL9ltCLUojVEB9EsssH1Wo3rFR03Wk9ZbMcmmn8Av1ZLexFDD2TKhGcNHNfguB5rf+Sc/Vt45pSPemE60ro1ej4n/wwpyFM+5w6rOYqZTaBiRwzgTwYoS1JcgKnu0ACIxYqpIhxeBbNT6rA45bBvTQOCNdUWALyRkTiLYiX5HKbrrhg2ZsmX62GwEPtm+okJIQLmQp7nLfQMDONrETrBnj/D2aUo6LuwtONBD35c/hLKTMVCDIBOS35Js55GJr2Y+dhG5ly0dKoMnu0fXScrEYQzjJ1Pbv7zvpO7RBV0El8Qg8AfR2ZAD/UlugRnEDCyVhAXz8g333r0whz3LvacHTwMJoMXFXt9yVnt3ivrZ2NGLcvSNx2ZjBU7Y2EWJoEKGl49+idqmHK1Equ3dM+FNiCNX2PCEL3DGs8GpuzFFM9d9q863xCCNRecgG8rrQKSo328Q656g3iSUqFcCwIDAQABo2MwYTAdBgNVHQ4EFgQUG1wKx1KEl6W80lEsseFU7LXvBFcwHwYDVR0jBBgwFoAUG1wKx1KEl6W80lEsseFU7LXvBFcwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBADNoTWI0fdkdQW8/knVy+pEYh4y4f/9rit55MznmMsuOTHv9svNH9AMw/ZBvsfu4fEcXQyhmeonRMPdsz26ZigxG4k2wcN9fV2VMI3MlIbefCJZhIS89c3kyBF5MhbnaWxvP95nfa41cfbsXnSrj31VNMSXiA6YegbiL/v/0IkUd1mwXcXTcyxbkMLuTORgA6WiPpHVMN//YnQSMWa9ukh0uHsAuoDc2NvteXZsQkpOdZdJB4pIL2t68agyGy1Wv78jiaoxVbfYL4T3TJMxIGFSZFoV+1SYmVsfPxrSQ7vopT6y61r9c/b2fTPaHO+22pFx8lNsHII5kXpWQZIpRHgqjydN/VtnaC6dKq1lPvQTplzQVfCNWGaa/BddinGNV2qwo2a1QnEv7/6t1Gtxs5rte31aCNkfIx/mThk10fMmwJK9ECWKT/+X5iWpydI3zBIE+OvO0MKpaOeVVz1JehkZxGWP+qduF8lgL7Hs4osQNbQIu41twarpSjVCEm/FLVqo8wLmTi2Y5a7QTANeNLdyAKxjTJn3uuAvVYUUHzKOXgKF/X0tCZrUsH3//2MW3nqenN0ldXIzf7OjnVcbv2iKUzqYFzJHYtAuWHbew/kj2TvWeFFzdigLYjfnfZvHQ5sPxwrx6YpDeNf8inj48oEj6Raos0ClF0nmVg2eLMe9f</xd:EncapsulatedX509Certificate></xd:CertificateValues></xd:UnsignedSignatureProperties></xd:UnsignedProperties></xd:QualifyingProperties></Object></Signature>
\ No newline at end of file diff --git a/sax/source/expatwrap/expwrap.component b/sax/source/expatwrap/expwrap.component new file mode 100644 index 0000000000..1f72eccf31 --- /dev/null +++ b/sax/source/expatwrap/expwrap.component @@ -0,0 +1,38 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + --> + +<component loader="com.sun.star.loader.SharedLibrary" environment="@CPPU_ENV@" + xmlns="http://openoffice.org/2010/uno-components"> + <implementation name="com.sun.star.comp.extensions.xml.sax.ParserExpat" + constructor="com_sun_star_comp_extensions_xml_sax_ParserExpat_get_implementation"> + <service name="com.sun.star.xml.sax.Parser"/> + </implementation> + <implementation name="com.sun.star.extensions.xml.sax.Writer" + constructor="com_sun_star_extensions_xml_sax_Writer_get_implementation"> + <service name="com.sun.star.xml.sax.Writer"/> + </implementation> + <implementation name="com.sun.star.comp.extensions.xml.sax.FastParser" + constructor="com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation"> + <service name="com.sun.star.xml.sax.FastParser"/> + </implementation> + <implementation name="com.sun.star.comp.extensions.xml.sax.LegacyFastParser" + constructor="com_sun_star_comp_extensions_xml_sax_LegacyFastParser_get_implementation"> + <service name="com.sun.star.xml.sax.LegacyFastParser"/> + </implementation> +</component> diff --git a/sax/source/expatwrap/sax_expat.cxx b/sax/source/expatwrap/sax_expat.cxx new file mode 100644 index 0000000000..9a82b87036 --- /dev/null +++ b/sax/source/expatwrap/sax_expat.cxx @@ -0,0 +1,957 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <string.h> +#include <cassert> +#include <memory> +#include <mutex> +#include <utility> +#include <string_view> +#include <vector> + + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/io/IOException.hpp> +#include <com/sun/star/io/XSeekable.hpp> +#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp> + +#include <comphelper/attributelist.hxx> +#include <cppuhelper/weak.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <rtl/ref.hxx> +#include <sal/log.hxx> + +#include <expat.h> +#include <xml2utf.hxx> + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star::io; + + +namespace { + +#define XML_CHAR_TO_OUSTRING(x) OUString(x , strlen( x ), RTL_TEXTENCODING_UTF8) +#define XML_CHAR_N_TO_USTRING(x,n) OUString(x,n, RTL_TEXTENCODING_UTF8 ) + + +/* +* The following macro encapsulates any call to an event handler. +* It ensures, that exceptions thrown by the event handler are +* treated properly. +*/ +#define CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pThis,call) \ + if( ! pThis->bExceptionWasThrown ) { \ + try {\ + pThis->call;\ + }\ + catch( const SAXParseException &e ) {\ + callErrorHandler( pThis , e );\ + }\ + catch( const SAXException &e ) {\ + callErrorHandler( pThis , SAXParseException(\ + e.Message, \ + e.Context, \ + e.WrappedException,\ + pThis->rDocumentLocator->getPublicId(),\ + pThis->rDocumentLocator->getSystemId(),\ + pThis->rDocumentLocator->getLineNumber(),\ + pThis->rDocumentLocator->getColumnNumber()\ + ) );\ + }\ + catch( const css::uno::RuntimeException &e ) {\ + pThis->bExceptionWasThrown = true; \ + pThis->bRTExceptionWasThrown = true; \ + pImpl->rtexception = e; \ + }\ + catch( const css::uno::Exception &e ) {\ + pThis->bExceptionWasThrown = true; \ + pThis->bRTExceptionWasThrown = true; \ + pImpl->rtexception = WrappedTargetRuntimeException("Non-runtime UNO exception caught during parse", e.Context, css::uno::Any(e)); \ + }\ + }\ + ((void)0) + + +class SaxExpatParser_Impl; + +// This class implements the external Parser interface +class SaxExpatParser + : public WeakImplHelper< XInitialization + , XServiceInfo + , XParser > +{ + +public: + SaxExpatParser(); + + // css::lang::XInitialization: + virtual void SAL_CALL initialize(css::uno::Sequence<css::uno::Any> const& rArguments) override; + + // The SAX-Parser-Interface + virtual void SAL_CALL parseStream( const InputSource& structSource) override; + virtual void SAL_CALL setDocumentHandler(const css::uno::Reference< XDocumentHandler > & xHandler) override; + + virtual void SAL_CALL setErrorHandler(const css::uno::Reference< XErrorHandler > & xHandler) override; + virtual void SAL_CALL setDTDHandler(const css::uno::Reference < XDTDHandler > & xHandler) override; + virtual void SAL_CALL setEntityResolver(const css::uno::Reference< XEntityResolver >& xResolver) override; + + virtual void SAL_CALL setLocale( const Locale &locale ) override; + +public: // XServiceInfo + OUString SAL_CALL getImplementationName() override; + css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; + +private: + std::unique_ptr<SaxExpatParser_Impl> m_pImpl; +}; + + +// Entity binds all information needed for a single file +struct Entity +{ + InputSource structSource; + XML_Parser pParser; + sax_expatwrap::XMLFile2UTFConverter converter; +}; + + +class SaxExpatParser_Impl +{ +public: // module scope + std::mutex aMutex; + bool m_bEnableDoS; // fdo#60471 thank you Adobe Illustrator + + css::uno::Reference< XDocumentHandler > rDocumentHandler; + css::uno::Reference< XExtendedDocumentHandler > rExtendedDocumentHandler; + + css::uno::Reference< XErrorHandler > rErrorHandler; + css::uno::Reference< XDTDHandler > rDTDHandler; + css::uno::Reference< XEntityResolver > rEntityResolver; + css::uno::Reference < XLocator > rDocumentLocator; + + + rtl::Reference < comphelper::AttributeList > rAttrList; + + // External entity stack + std::vector<struct Entity> vecEntity; + void pushEntity( Entity &&entity ) + { vecEntity.push_back( std::move(entity) ); } + void popEntity() + { vecEntity.pop_back( ); } + struct Entity &getEntity() + { return vecEntity.back(); } + + + // Exception cannot be thrown through the C-XmlParser (possible resource leaks), + // therefore the exception must be saved somewhere. + SAXParseException exception; + css::uno::RuntimeException rtexception; + bool bExceptionWasThrown; + bool bRTExceptionWasThrown; + +public: + SaxExpatParser_Impl() + : m_bEnableDoS(false) + , bExceptionWasThrown(false) + , bRTExceptionWasThrown(false) + { + } + + // the C-Callbacks for the expat parser + void static callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts); + void static callbackEndElement(void *userData, const XML_Char *name); + void static callbackCharacters( void *userData , const XML_Char *s , int nLen ); + void static callbackProcessingInstruction( void *userData , + const XML_Char *sTarget , + const XML_Char *sData ); + + void static callbackEntityDecl( void *userData , + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName); + + void static callbackNotationDecl( void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); + + bool static callbackExternalEntityRef( XML_Parser parser, + const XML_Char *openEntityNames, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); + + int static callbackUnknownEncoding(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info); + + void static callbackDefault( void *userData, const XML_Char *s, int len); + + void static callbackStartCDATA( void *userData ); + void static callbackEndCDATA( void *userData ); + void static callbackComment( void *userData , const XML_Char *s ); + void static callErrorHandler( SaxExpatParser_Impl *pImpl , const SAXParseException &e ); + +public: + void parse(); +}; + +extern "C" +{ + static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts) + { + SaxExpatParser_Impl::callbackStartElement(userData,name,atts); + } + static void call_callbackEndElement(void *userData, const XML_Char *name) + { + SaxExpatParser_Impl::callbackEndElement(userData,name); + } + static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen ) + { + SaxExpatParser_Impl::callbackCharacters(userData,s,nLen); + } + static void call_callbackProcessingInstruction(void *userData,const XML_Char *sTarget,const XML_Char *sData ) + { + SaxExpatParser_Impl::callbackProcessingInstruction(userData,sTarget,sData ); + } + static void call_callbackEntityDecl(void *userData , + const XML_Char *entityName, + int is_parameter_entity, + const XML_Char *value, + int value_length, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) + { + SaxExpatParser_Impl::callbackEntityDecl(userData, entityName, + is_parameter_entity, value, value_length, + base, systemId, publicId, notationName); + } + static void call_callbackNotationDecl(void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) + { + SaxExpatParser_Impl::callbackNotationDecl(userData,notationName,base,systemId,publicId); + } + static int call_callbackExternalEntityRef(XML_Parser parser, + const XML_Char *openEntityNames, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) + { + return SaxExpatParser_Impl::callbackExternalEntityRef(parser,openEntityNames,base,systemId,publicId); + } + static int call_callbackUnknownEncoding(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info) + { + return SaxExpatParser_Impl::callbackUnknownEncoding(encodingHandlerData,name,info); + } + static void call_callbackDefault( void *userData, const XML_Char *s, int len) + { + SaxExpatParser_Impl::callbackDefault(userData,s,len); + } + static void call_callbackStartCDATA( void *userData ) + { + SaxExpatParser_Impl::callbackStartCDATA(userData); + } + static void call_callbackEndCDATA( void *userData ) + { + SaxExpatParser_Impl::callbackEndCDATA(userData); + } + static void call_callbackComment( void *userData , const XML_Char *s ) + { + SaxExpatParser_Impl::callbackComment(userData,s); + } +} + + +// LocatorImpl + +class LocatorImpl : + public WeakImplHelper< XLocator, css::io::XSeekable > + // should use a different interface for stream positions! +{ +public: + explicit LocatorImpl(SaxExpatParser_Impl *p) + : m_pParser(p) + { + } + +public: //XLocator + virtual sal_Int32 SAL_CALL getColumnNumber() override + { + return XML_GetCurrentColumnNumber( m_pParser->getEntity().pParser ); + } + virtual sal_Int32 SAL_CALL getLineNumber() override + { + return XML_GetCurrentLineNumber( m_pParser->getEntity().pParser ); + } + virtual OUString SAL_CALL getPublicId() override + { + return m_pParser->getEntity().structSource.sPublicId; + } + virtual OUString SAL_CALL getSystemId() override + { + return m_pParser->getEntity().structSource.sSystemId; + } + + // XSeekable (only for getPosition) + + virtual void SAL_CALL seek( sal_Int64 ) override + { + } + virtual sal_Int64 SAL_CALL getPosition() override + { + return XML_GetCurrentByteIndex( m_pParser->getEntity().pParser ); + } + virtual ::sal_Int64 SAL_CALL getLength() override + { + return 0; + } + +private: + + SaxExpatParser_Impl *m_pParser; +}; + + +SaxExpatParser::SaxExpatParser( ) +{ + m_pImpl.reset( new SaxExpatParser_Impl ); + + rtl::Reference<LocatorImpl> pLoc = new LocatorImpl( m_pImpl.get() ); + m_pImpl->rDocumentLocator = pLoc; + + // Performance-improvement; handing out the same object with every call of + // the startElement callback is allowed (see sax-specification): + m_pImpl->rAttrList = new comphelper::AttributeList; + + m_pImpl->bExceptionWasThrown = false; + m_pImpl->bRTExceptionWasThrown = false; +} + +// css::lang::XInitialization: +void SAL_CALL +SaxExpatParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments) +{ + // possible arguments: a string "DoSmeplease" + if (rArguments.hasElements()) + { + OUString str; + if ((rArguments[0] >>= str) && "DoSmeplease" == str) + { + std::unique_lock guard( m_pImpl->aMutex ); + m_pImpl->m_bEnableDoS = true; + } + } +} + +class ParserCleanup +{ +private: + SaxExpatParser_Impl& m_rParser; + XML_Parser m_xmlParser; +public: + ParserCleanup(SaxExpatParser_Impl& rParser, XML_Parser xmlParser) + : m_rParser(rParser) + , m_xmlParser(xmlParser) + { + } + ~ParserCleanup() + { + m_rParser.popEntity(); + //XML_ParserFree accepts a null arg + XML_ParserFree(m_xmlParser); + } +}; + +/*************** +* +* parseStream does Parser-startup initializations. The SaxExpatParser_Impl::parse() method does +* the file-specific initialization work. (During a parser run, external files may be opened) +* +****************/ +void SaxExpatParser::parseStream( const InputSource& structSource) +{ + // Only one text at one time + std::unique_lock guard( m_pImpl->aMutex ); + + + struct Entity entity; + entity.structSource = structSource; + + if( ! entity.structSource.aInputStream.is() ) + { + throw SAXException("No input source", + css::uno::Reference< css::uno::XInterface > () , css::uno::Any() ); + } + + entity.converter.setInputStream( entity.structSource.aInputStream ); + if( !entity.structSource.sEncoding.isEmpty() ) + { + entity.converter.setEncoding( + OUStringToOString( entity.structSource.sEncoding , RTL_TEXTENCODING_ASCII_US ) ); + } + + // create parser with proper encoding + entity.pParser = XML_ParserCreate( nullptr ); + if( ! entity.pParser ) + { + throw SAXException("Couldn't create parser", + css::uno::Reference< css::uno::XInterface > (), css::uno::Any() ); + } + + // set all necessary C-Callbacks + XML_SetUserData( entity.pParser, m_pImpl.get() ); + XML_SetElementHandler( entity.pParser , + call_callbackStartElement , + call_callbackEndElement ); + XML_SetCharacterDataHandler( entity.pParser , call_callbackCharacters ); + XML_SetProcessingInstructionHandler(entity.pParser , + call_callbackProcessingInstruction ); + if (!m_pImpl->m_bEnableDoS) + { + XML_SetEntityDeclHandler(entity.pParser, call_callbackEntityDecl); + } + XML_SetNotationDeclHandler( entity.pParser, call_callbackNotationDecl ); + XML_SetExternalEntityRefHandler( entity.pParser, + call_callbackExternalEntityRef); + XML_SetUnknownEncodingHandler( entity.pParser, call_callbackUnknownEncoding ,nullptr); + + if( m_pImpl->rExtendedDocumentHandler.is() ) { + + // These handlers just delegate calls to the ExtendedHandler. If no extended handler is + // given, these callbacks can be ignored + XML_SetDefaultHandlerExpand( entity.pParser, call_callbackDefault ); + XML_SetCommentHandler( entity.pParser, call_callbackComment ); + XML_SetCdataSectionHandler( entity.pParser , + call_callbackStartCDATA , + call_callbackEndCDATA ); + } + + + m_pImpl->exception = SAXParseException(); + auto const xmlParser = entity.pParser; + m_pImpl->pushEntity( std::move(entity) ); + + ParserCleanup aEnsureFree(*m_pImpl, xmlParser); + + // start the document + if( m_pImpl->rDocumentHandler.is() ) { + m_pImpl->rDocumentHandler->setDocumentLocator( m_pImpl->rDocumentLocator ); + m_pImpl->rDocumentHandler->startDocument(); + } + + m_pImpl->parse(); + + // finish document + if( m_pImpl->rDocumentHandler.is() ) { + m_pImpl->rDocumentHandler->endDocument(); + } +} + +void SaxExpatParser::setDocumentHandler(const css::uno::Reference< XDocumentHandler > & xHandler) +{ + m_pImpl->rDocumentHandler = xHandler; + m_pImpl->rExtendedDocumentHandler = + css::uno::Reference< XExtendedDocumentHandler >( xHandler , css::uno::UNO_QUERY ); +} + +void SaxExpatParser::setErrorHandler(const css::uno::Reference< XErrorHandler > & xHandler) +{ + m_pImpl->rErrorHandler = xHandler; +} + +void SaxExpatParser::setDTDHandler(const css::uno::Reference< XDTDHandler > & xHandler) +{ + m_pImpl->rDTDHandler = xHandler; +} + +void SaxExpatParser::setEntityResolver(const css::uno::Reference < XEntityResolver > & xResolver) +{ + m_pImpl->rEntityResolver = xResolver; +} + + +void SaxExpatParser::setLocale( const Locale & ) +{ + // not implemented +} + +// XServiceInfo +OUString SaxExpatParser::getImplementationName() +{ + return "com.sun.star.comp.extensions.xml.sax.ParserExpat"; +} + +// XServiceInfo +sal_Bool SaxExpatParser::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +// XServiceInfo +css::uno::Sequence< OUString > SaxExpatParser::getSupportedServiceNames() +{ + return { "com.sun.star.xml.sax.Parser" }; +} + + +/*--------------------------------------- +* +* Helper functions and classes +* +* +*-------------------------------------------*/ +OUString getErrorMessage( XML_Error xmlE, std::u16string_view sSystemId , sal_Int32 nLine ) +{ + OUString Message; + if( XML_ERROR_NONE == xmlE ) { + Message = "No"; + } + else if( XML_ERROR_NO_MEMORY == xmlE ) { + Message = "no memory"; + } + else if( XML_ERROR_SYNTAX == xmlE ) { + Message = "syntax"; + } + else if( XML_ERROR_NO_ELEMENTS == xmlE ) { + Message = "no elements"; + } + else if( XML_ERROR_INVALID_TOKEN == xmlE ) { + Message = "invalid token"; + } + else if( XML_ERROR_UNCLOSED_TOKEN == xmlE ) { + Message = "unclosed token"; + } + else if( XML_ERROR_PARTIAL_CHAR == xmlE ) { + Message = "partial char"; + } + else if( XML_ERROR_TAG_MISMATCH == xmlE ) { + Message = "tag mismatch"; + } + else if( XML_ERROR_DUPLICATE_ATTRIBUTE == xmlE ) { + Message = "duplicate attribute"; + } + else if( XML_ERROR_JUNK_AFTER_DOC_ELEMENT == xmlE ) { + Message = "junk after doc element"; + } + else if( XML_ERROR_PARAM_ENTITY_REF == xmlE ) { + Message = "parameter entity reference"; + } + else if( XML_ERROR_UNDEFINED_ENTITY == xmlE ) { + Message = "undefined entity"; + } + else if( XML_ERROR_RECURSIVE_ENTITY_REF == xmlE ) { + Message = "recursive entity reference"; + } + else if( XML_ERROR_ASYNC_ENTITY == xmlE ) { + Message = "async entity"; + } + else if( XML_ERROR_BAD_CHAR_REF == xmlE ) { + Message = "bad char reference"; + } + else if( XML_ERROR_BINARY_ENTITY_REF == xmlE ) { + Message = "binary entity reference"; + } + else if( XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF == xmlE ) { + Message = "attribute external entity reference"; + } + else if( XML_ERROR_MISPLACED_XML_PI == xmlE ) { + Message = "misplaced xml processing instruction"; + } + else if( XML_ERROR_UNKNOWN_ENCODING == xmlE ) { + Message = "unknown encoding"; + } + else if( XML_ERROR_INCORRECT_ENCODING == xmlE ) { + Message = "incorrect encoding"; + } + else if( XML_ERROR_UNCLOSED_CDATA_SECTION == xmlE ) { + Message = "unclosed cdata section"; + } + else if( XML_ERROR_EXTERNAL_ENTITY_HANDLING == xmlE ) { + Message = "external entity reference"; + } + else if( XML_ERROR_NOT_STANDALONE == xmlE ) { + Message = "not standalone"; + } + + OUString str = OUString::Concat("[") + + sSystemId + + " line " + + OUString::number( nLine ) + + "]: " + + Message + + "error"; + + return str; +} + + +// starts parsing with actual parser ! +void SaxExpatParser_Impl::parse( ) +{ + const int nBufSize = 16*1024; + + int nRead = nBufSize; + css::uno::Sequence< sal_Int8 > seqOut(nBufSize); + + while( nRead ) { + nRead = getEntity().converter.readAndConvert( seqOut , nBufSize ); + + bool bContinue(false); + + if( ! nRead ) { + // last call - must return OK + XML_Status const ret = XML_Parse( getEntity().pParser, + reinterpret_cast<const char *>(seqOut.getConstArray()), + 0 , + 1 ); + if (ret == XML_STATUS_OK) { + break; + } + } else { + bContinue = ( XML_Parse( getEntity().pParser, + reinterpret_cast<const char *>(seqOut.getConstArray()), + nRead, + 0 ) != XML_STATUS_ERROR ); + } + + if( ! bContinue || bExceptionWasThrown ) { + + if ( bRTExceptionWasThrown ) + throw rtexception; + + // Error during parsing ! + XML_Error xmlE = XML_GetErrorCode( getEntity().pParser ); + OUString sSystemId = rDocumentLocator->getSystemId(); + sal_Int32 nLine = rDocumentLocator->getLineNumber(); + + SAXParseException aExcept( + getErrorMessage(xmlE , sSystemId, nLine) , + css::uno::Reference< css::uno::XInterface >(), + css::uno::Any( &exception , cppu::UnoType<decltype(exception)>::get() ), + rDocumentLocator->getPublicId(), + rDocumentLocator->getSystemId(), + rDocumentLocator->getLineNumber(), + rDocumentLocator->getColumnNumber() + ); + + if( rErrorHandler.is() ) { + + // error handler is set, so the handler may throw the exception + css::uno::Any a; + a <<= aExcept; + rErrorHandler->fatalError( a ); + } + + // Error handler has not thrown an exception, but parsing cannot go on, + // so an exception MUST be thrown. + throw aExcept; + } // if( ! bContinue ) + } // while +} + + +// The C-Callbacks + + +void SaxExpatParser_Impl::callbackStartElement( void *pvThis , + const XML_Char *pwName , + const XML_Char **awAttributes ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + + if( !pImpl->rDocumentHandler.is() ) + return; + + int i = 0; + pImpl->rAttrList->Clear(); + + while( awAttributes[i] ) { + assert(awAttributes[i+1]); + pImpl->rAttrList->AddAttribute( + XML_CHAR_TO_OUSTRING( awAttributes[i] ) , + XML_CHAR_TO_OUSTRING( awAttributes[i+1] ) ); + i +=2; + } + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( + pImpl , + rDocumentHandler->startElement( XML_CHAR_TO_OUSTRING( pwName ) , + pImpl->rAttrList ) ); +} + +void SaxExpatParser_Impl::callbackEndElement( void *pvThis , const XML_Char *pwName ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + + if( pImpl->rDocumentHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rDocumentHandler->endElement( XML_CHAR_TO_OUSTRING( pwName ) ) ); + } +} + + +void SaxExpatParser_Impl::callbackCharacters( void *pvThis , const XML_Char *s , int nLen ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + + if( pImpl->rDocumentHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl , + rDocumentHandler->characters( XML_CHAR_N_TO_USTRING(s,nLen) ) ); + } +} + +void SaxExpatParser_Impl::callbackProcessingInstruction( void *pvThis, + const XML_Char *sTarget , + const XML_Char *sData ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + if( pImpl->rDocumentHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( + pImpl , + rDocumentHandler->processingInstruction( XML_CHAR_TO_OUSTRING( sTarget ), + XML_CHAR_TO_OUSTRING( sData ) ) ); + } +} + + +void SaxExpatParser_Impl::callbackEntityDecl( + void *pvThis, const XML_Char *entityName, + SAL_UNUSED_PARAMETER int /*is_parameter_entity*/, + const XML_Char *value, SAL_UNUSED_PARAMETER int /*value_length*/, + SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId, + const XML_Char *publicId, const XML_Char *notationName) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + if (value) { // value != 0 means internal entity + SAL_INFO("sax","SaxExpatParser: internal entity declaration, stopping"); + XML_StopParser(pImpl->getEntity().pParser, XML_FALSE); + pImpl->exception = SAXParseException( + "SaxExpatParser: internal entity declaration, stopping", + nullptr, css::uno::Any(), + pImpl->rDocumentLocator->getPublicId(), + pImpl->rDocumentLocator->getSystemId(), + pImpl->rDocumentLocator->getLineNumber(), + pImpl->rDocumentLocator->getColumnNumber() ); + pImpl->bExceptionWasThrown = true; + } else { + if( pImpl->rDTDHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( + pImpl , + rDTDHandler->unparsedEntityDecl( + XML_CHAR_TO_OUSTRING( entityName ), + XML_CHAR_TO_OUSTRING( publicId ) , + XML_CHAR_TO_OUSTRING( systemId ) , + XML_CHAR_TO_OUSTRING( notationName ) ) ); + } + } +} + +void SaxExpatParser_Impl::callbackNotationDecl( + void *pvThis, const XML_Char *notationName, + SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId, + const XML_Char *publicId) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + if( pImpl->rDTDHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rDTDHandler->notationDecl( XML_CHAR_TO_OUSTRING( notationName ) , + XML_CHAR_TO_OUSTRING( publicId ) , + XML_CHAR_TO_OUSTRING( systemId ) ) ); + } + +} + + +bool SaxExpatParser_Impl::callbackExternalEntityRef( + XML_Parser parser, const XML_Char *context, + SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId, + const XML_Char *publicId) +{ + bool bOK = true; + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(XML_GetUserData( parser )); + + struct Entity entity; + + if( pImpl->rEntityResolver.is() ) { + try + { + entity.structSource = pImpl->rEntityResolver->resolveEntity( + XML_CHAR_TO_OUSTRING( publicId ) , + XML_CHAR_TO_OUSTRING( systemId ) ); + } + catch( const SAXParseException & e ) + { + pImpl->exception = e; + bOK = false; + } + catch( const SAXException & e ) + { + pImpl->exception = SAXParseException( + e.Message , e.Context , e.WrappedException , + pImpl->rDocumentLocator->getPublicId(), + pImpl->rDocumentLocator->getSystemId(), + pImpl->rDocumentLocator->getLineNumber(), + pImpl->rDocumentLocator->getColumnNumber() ); + bOK = false; + } + } + + if( entity.structSource.aInputStream.is() ) { + entity.pParser = XML_ExternalEntityParserCreate( parser , context, nullptr ); + if( ! entity.pParser ) + { + return false; + } + + entity.converter.setInputStream( entity.structSource.aInputStream ); + auto const xmlParser = entity.pParser; + pImpl->pushEntity( std::move(entity) ); + try + { + pImpl->parse(); + } + catch( const SAXParseException & e ) + { + pImpl->exception = e; + bOK = false; + } + catch( const IOException &e ) + { + pImpl->exception.WrappedException <<= e; + bOK = false; + } + catch( const css::uno::RuntimeException &e ) + { + pImpl->exception.WrappedException <<=e; + bOK = false; + } + + pImpl->popEntity(); + + XML_ParserFree( xmlParser ); + } + + return bOK; +} + +int SaxExpatParser_Impl::callbackUnknownEncoding( + SAL_UNUSED_PARAMETER void * /*encodingHandlerData*/, + SAL_UNUSED_PARAMETER const XML_Char * /*name*/, + SAL_UNUSED_PARAMETER XML_Encoding * /*info*/) +{ + return 0; +} + +void SaxExpatParser_Impl::callbackDefault( void *pvThis, const XML_Char *s, int len) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rExtendedDocumentHandler->unknown( XML_CHAR_N_TO_USTRING( s ,len) ) ); +} + +void SaxExpatParser_Impl::callbackComment( void *pvThis , const XML_Char *s ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rExtendedDocumentHandler->comment( XML_CHAR_TO_OUSTRING( s ) ) ); +} + +void SaxExpatParser_Impl::callbackStartCDATA( void *pvThis ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, rExtendedDocumentHandler->startCDATA() ); +} + + +void SaxExpatParser_Impl::callErrorHandler( SaxExpatParser_Impl *pImpl , + const SAXParseException & e ) +{ + try + { + if( pImpl->rErrorHandler.is() ) { + css::uno::Any a; + a <<= e; + pImpl->rErrorHandler->error( a ); + } + else { + pImpl->exception = e; + pImpl->bExceptionWasThrown = true; + } + } + catch( const SAXParseException & ex ) { + pImpl->exception = ex; + pImpl->bExceptionWasThrown = true; + } + catch( const SAXException & ex ) { + pImpl->exception = SAXParseException( + ex.Message, + ex.Context, + ex.WrappedException, + pImpl->rDocumentLocator->getPublicId(), + pImpl->rDocumentLocator->getSystemId(), + pImpl->rDocumentLocator->getLineNumber(), + pImpl->rDocumentLocator->getColumnNumber() + ); + pImpl->bExceptionWasThrown = true; + } +} + +void SaxExpatParser_Impl::callbackEndCDATA( void *pvThis ) +{ + SaxExpatParser_Impl *pImpl = static_cast<SaxExpatParser_Impl*>(pvThis); + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pImpl,rExtendedDocumentHandler->endCDATA() ); +} + +} // namespace + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_comp_extensions_xml_sax_ParserExpat_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new SaxExpatParser); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx new file mode 100644 index 0000000000..55608101fa --- /dev/null +++ b/sax/source/expatwrap/saxwriter.cxx @@ -0,0 +1,1464 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <string.h> + +#include <cassert> +#include <set> +#include <stack> +#include <vector> + +#include <com/sun/star/io/IOException.hpp> +#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp> +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/util/XCloneable.hpp> +#include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp> +#include <com/sun/star/xml/sax/XWriter.hpp> + +#include <cppuhelper/exc_hlp.hxx> +#include <cppuhelper/weak.hxx> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> + +#include <osl/diagnose.h> +#include <rtl/character.hxx> +#include <sal/log.hxx> + +#include <memory> + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star::util; +using namespace ::com::sun::star::io; + +#define LINEFEED 10 +#define SEQUENCESIZE 1024 +#define MAXCOLUMNCOUNT 72 + +/****** +* +* +* Character conversion functions +* +* +*****/ + +namespace +{ +enum SaxInvalidCharacterError +{ + SAX_NONE, + SAX_WARNING, + SAX_ERROR +}; + +// Stuff for custom entity names +struct ReplacementPair +{ + OUString name; + OUString replacement; +}; +inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs) +{ + return lhs.replacement.compareTo(rhs.replacement) < 0; +} + +class SaxWriterHelper +{ +#ifdef DBG_UTIL +public: + ::std::stack<OUString> m_DebugStartedElements; +#endif + +private: + Reference<XOutputStream> m_out; + Sequence<sal_Int8> m_Sequence; + sal_Int8* mp_Sequence; + + sal_Int32 nLastLineFeedPos; // is negative after writing a sequence + sal_uInt32 nCurrentPos; + bool m_bStartElementFinished; + + std::vector<ReplacementPair> m_Replacements; + + /// @throws SAXException + sal_uInt32 writeSequence(); + + // use only if to insert the bytes more space in the sequence is needed and + // so the sequence has to write out and reset rPos to 0 + // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE) + /// @throws SAXException + void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes, + sal_uInt32 nBytesCount); + /// @throws SAXException + bool convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, bool bDoNormalization, + bool bNormalizeWhitespace, sal_Int8* pTarget, sal_uInt32& rPos); + /// @throws SAXException + void FinishStartElement(); + + // Search for the correct replacement + const ReplacementPair* findXMLReplacement(const sal_Unicode* pStr, sal_Int32 nStrLen); + +public: + explicit SaxWriterHelper(Reference<XOutputStream> const& m_TempOut) + : m_out(m_TempOut) + , m_Sequence(SEQUENCESIZE) + , mp_Sequence(nullptr) + , nLastLineFeedPos(0) + , nCurrentPos(0) + , m_bStartElementFinished(true) + { + OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size too small"); + mp_Sequence = m_Sequence.getArray(); + } + ~SaxWriterHelper() + { + OSL_ENSURE(!nCurrentPos, "cached Sequence not written"); + OSL_ENSURE(m_bStartElementFinished, "StartElement not completely written"); + } + + /// @throws SAXException + void insertIndentation(sal_uInt32 m_nLevel); + + // returns whether it works correct or invalid characters were in the string + // If there are invalid characters in the string it returns sal_False. + // Than the calling method has to throw the needed Exception. + /// @throws SAXException + bool writeString(const OUString& rWriteOutString, bool bDoNormalization, + bool bNormalizeWhitespace); + + sal_uInt32 GetLastColumnCount() const noexcept + { + return static_cast<sal_uInt32>(nCurrentPos - nLastLineFeedPos); + } + + /// @throws SAXException + void startDocument(); + + // returns whether it works correct or invalid characters were in the strings + // If there are invalid characters in one of the strings it returns sal_False. + // Than the calling method has to throw the needed Exception. + /// @throws SAXException + SaxInvalidCharacterError startElement(const OUString& rName, + const Reference<XAttributeList>& xAttribs); + /// @throws SAXException + bool FinishEmptyElement(); + + // returns whether it works correct or invalid characters were in the string + // If there are invalid characters in the string it returns sal_False. + // Than the calling method has to throw the needed Exception. + /// @throws SAXException + bool endElement(const OUString& rName); + /// @throws SAXException + void endDocument(); + + // returns whether it works correct or invalid characters were in the strings + // If there are invalid characters in the string it returns sal_False. + // Than the calling method has to throw the needed Exception. + /// @throws SAXException + bool processingInstruction(const OUString& rTarget, const OUString& rData); + /// @throws SAXException + void startCDATA(); + /// @throws SAXException + void endCDATA(); + + // returns whether it works correct or invalid characters were in the strings + // If there are invalid characters in the string it returns sal_False. + // Than the calling method has to throw the needed Exception. + /// @throws SAXException + bool comment(const OUString& rComment); + + /// @throws SAXException + void clearBuffer(); + + // Use custom entity names + void setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& + replacements); + + // Calculate length for convertToXML + sal_Int32 calcXMLByteLength(const OUString& rStr, bool bDoNormalization, + bool bNormalizeWhitespace); +}; + +const bool g_bValidCharsBelow32[32] = { + // clang-format off +// 0 1 2 3 4 5 6 7 + false, false, false, false, false, false, false, false, //0 + false, true, true, false, false, true, false, false, //8 + false, false, false, false, false, false, false, false, //16 + false, false, false, false, false, false, false, false + // clang-format on +}; + +bool IsInvalidChar(const sal_Unicode aChar) +{ + bool bRet(false); + // check first for the most common characters + if (aChar < 32 || aChar >= 0xd800) + bRet = ((aChar < 32 && !g_bValidCharsBelow32[aChar]) || aChar == 0xffff || aChar == 0xfffe); + return bRet; +} + +/******** +* write through to the output stream +* +*****/ +sal_uInt32 SaxWriterHelper::writeSequence() +{ + try + { + m_out->writeBytes(m_Sequence); + } + catch (const IOException&) + { + css::uno::Any anyEx = cppu::getCaughtException(); + throw SAXException("IO exception during writing", Reference<XInterface>(), anyEx); + } + nLastLineFeedPos -= SEQUENCESIZE; + return 0; +} + +void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes, + sal_uInt32 nBytesCount) +{ + OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod"); + sal_uInt32 nCount(SEQUENCESIZE - rPos); + memcpy(&(pTarget[rPos]), pBytes, nCount); + + OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end"); + + rPos = writeSequence(); + sal_uInt32 nRestCount(nBytesCount - nCount); + if ((rPos + nRestCount) <= SEQUENCESIZE) + { + memcpy(&(pTarget[rPos]), &pBytes[nCount], nRestCount); + rPos += nRestCount; + } + else + AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount); +} + +void SaxWriterHelper::setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements) +{ + m_Replacements.resize(replacements.size()); + for (size_t i = 0; i < replacements.size(); ++i) + { + m_Replacements[i].name = replacements[i].First; + m_Replacements[i].replacement = replacements[i].Second; + } + if (replacements.size() > 1) + std::sort(m_Replacements.begin(), m_Replacements.end()); +} + +/** Converts a UTF-16 string to UTF-8 and does XML normalization + + @param pTarget + Pointer to a piece of memory, to where the output should be written. The caller + must call calcXMLByteLength on the same string, to ensure, + that there is enough memory for converting. + */ +bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, + bool bDoNormalization, bool bNormalizeWhitespace, + sal_Int8* pTarget, sal_uInt32& rPos) +{ + bool bRet(true); + sal_uInt32 nSurrogate = 0; + + for (sal_Int32 i = 0; i < nStrLen; i++) + { + sal_Unicode c = pStr[i]; + if (IsInvalidChar(c)) + bRet = false; + else if ((c >= 0x0001) && (c <= 0x007F)) // Deal with ascii + { + if (bDoNormalization) + { + switch (c) + { + case '&': // resemble to & + { + if ((rPos + 5) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&"), 5); + else + { + memcpy(&(pTarget[rPos]), "&", 5); + rPos += 5; + } + } + break; + case '<': + { + if ((rPos + 4) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("<"), 4); + else + { + memcpy(&(pTarget[rPos]), "<", 4); + rPos += 4; // < + } + } + break; + case '>': + { + if ((rPos + 4) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(">"), 4); + else + { + memcpy(&(pTarget[rPos]), ">", 4); + rPos += 4; // > + } + } + break; + case '\'': + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("'"), 6); + else + { + memcpy(&(pTarget[rPos]), "'", 6); + rPos += 6; // ' + } + } + break; + case '"': + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("""), 6); + else + { + memcpy(&(pTarget[rPos]), """, 6); + rPos += 6; // " + } + } + break; + case 13: + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("
"), 6); + else + { + memcpy(&(pTarget[rPos]), "
", 6); + rPos += 6; + } + } + break; + case LINEFEED: + { + if (bNormalizeWhitespace) + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("
"), + 6); + else + { + memcpy(&(pTarget[rPos]), "
", 6); + rPos += 6; + } + } + else + { + pTarget[rPos] = LINEFEED; + nLastLineFeedPos = rPos; + rPos++; + } + } + break; + case 9: + { + if (bNormalizeWhitespace) + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("	"), + 6); + else + { + memcpy(&(pTarget[rPos]), "	", 6); + rPos += 6; + } + } + else + { + pTarget[rPos] = 9; + rPos++; + } + } + break; + default: + { + pTarget[rPos] = static_cast<sal_Int8>(c); + rPos++; + } + break; + } + } + else + { + pTarget[rPos] = static_cast<sal_Int8>(c); + if (static_cast<sal_Int8>(c) == LINEFEED) + nLastLineFeedPos = rPos; + rPos++; + } + } + else + { + // Deal with replacements + if (bDoNormalization && !m_Replacements.empty()) + { + // search + const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i); + + // replace + if (it != nullptr) + { + OString name = ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8); + if (rPos + name.getLength() > SEQUENCESIZE) + AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(name.getStr()), + name.getLength()); + else + { + memcpy(&(pTarget[rPos]), name.getStr(), name.getLength()); + rPos += name.getLength(); + } + i += it->replacement.getLength() - 1; + continue; + } + } + + // Deal with other unicode cases + if (rtl::isHighSurrogate(c)) + { + // 1. surrogate: save (until 2. surrogate) + if (nSurrogate != 0) // left-over lone 1st Unicode surrogate + { + OSL_FAIL("left-over Unicode surrogate"); + bRet = false; + } + nSurrogate = c; + } + else if (rtl::isLowSurrogate(c)) + { + // 2. surrogate: write as UTF-8 + if (nSurrogate) // can only be 1st surrogate + { + nSurrogate = rtl::combineSurrogates(nSurrogate, c); + sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)), + sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)), + sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)), + sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) }; + if ((rPos + 4) > SEQUENCESIZE) + AddBytes(pTarget, rPos, aBytes, 4); + else + { + pTarget[rPos] = aBytes[0]; + rPos++; + pTarget[rPos] = aBytes[1]; + rPos++; + pTarget[rPos] = aBytes[2]; + rPos++; + pTarget[rPos] = aBytes[3]; + rPos++; + } + } + else // lone 2nd surrogate + { + OSL_FAIL("illegal Unicode character"); + bRet = false; + } + + // reset surrogate + nSurrogate = 0; + } + else if (c > 0x07FF) + { + sal_Int8 aBytes[] + = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)), sal_Int8(0x80 | ((c >> 6) & 0x3F)), + sal_Int8(0x80 | ((c >> 0) & 0x3F)) }; + if ((rPos + 3) > SEQUENCESIZE) + AddBytes(pTarget, rPos, aBytes, 3); + else + { + pTarget[rPos] = aBytes[0]; + rPos++; + pTarget[rPos] = aBytes[1]; + rPos++; + pTarget[rPos] = aBytes[2]; + rPos++; + } + } + else + { + sal_Int8 aBytes[] + = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)), sal_Int8(0x80 | ((c >> 0) & 0x3F)) }; + if ((rPos + 2) > SEQUENCESIZE) + AddBytes(pTarget, rPos, aBytes, 2); + else + { + pTarget[rPos] = aBytes[0]; + rPos++; + pTarget[rPos] = aBytes[1]; + rPos++; + } + } + } + + OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position"); + if (rPos == SEQUENCESIZE) + rPos = writeSequence(); + + // reset left-over surrogate + if ((nSurrogate != 0) && !rtl::isHighSurrogate(c)) + { + OSL_FAIL("left-over Unicode surrogate"); + nSurrogate = 0; + bRet = false; + } + } + if (nSurrogate != 0) // trailing lone 1st surrogate + { + OSL_FAIL("left-over Unicode surrogate"); + bRet = false; + } + return bRet; +} + +void SaxWriterHelper::FinishStartElement() +{ + if (!m_bStartElementFinished) + { + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + m_bStartElementFinished = true; + } +} + +void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel) +{ + FinishStartElement(); + if (m_nLevel > 0) + { + if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE) + { + mp_Sequence[nCurrentPos] = LINEFEED; + nLastLineFeedPos = nCurrentPos; + nCurrentPos++; + memset(&(mp_Sequence[nCurrentPos]), 32, m_nLevel); + nCurrentPos += m_nLevel; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } + else + { + sal_uInt32 nCount(m_nLevel + 1); + std::unique_ptr<sal_Int8[]> pBytes(new sal_Int8[nCount]); + pBytes[0] = LINEFEED; + memset(&(pBytes[1]), 32, m_nLevel); + AddBytes(mp_Sequence, nCurrentPos, pBytes.get(), nCount); + pBytes.reset(); + nLastLineFeedPos = nCurrentPos - nCount; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } + } + else + { + mp_Sequence[nCurrentPos] = LINEFEED; + nLastLineFeedPos = nCurrentPos; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } +} + +bool SaxWriterHelper::writeString(const OUString& rWriteOutString, bool bDoNormalization, + bool bNormalizeWhitespace) +{ + FinishStartElement(); + return convertToXML(rWriteOutString.getStr(), rWriteOutString.getLength(), bDoNormalization, + bNormalizeWhitespace, mp_Sequence, nCurrentPos); +} + +void SaxWriterHelper::startDocument() +{ + const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; + const int nLen = strlen(pc); + if ((nCurrentPos + nLen) <= SEQUENCESIZE) + { + memcpy(mp_Sequence, pc, nLen); + nCurrentPos += nLen; + } + else + { + AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>(pc), nLen); + } + OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position"); + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = LINEFEED; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); +} + +#ifndef NDEBUG +bool inrange(sal_Unicode c, sal_Unicode start, sal_Unicode end) { return c >= start && c <= end; } +#endif + +void CheckValidName(OUString const& rName) +{ +#ifdef NDEBUG + (void)rName; +#else + assert(!rName.isEmpty()); + bool hasColon(false); + for (sal_Int32 i = 0; i < rName.getLength(); ++i) + { + auto const c(rName[i]); + if (c == ':') + { + // see https://www.w3.org/TR/REC-xml-names/#ns-qualnames + SAL_WARN_IF(hasColon, "sax", "only one colon allowed: " << rName); + assert(!hasColon && "only one colon allowed"); + hasColon = true; + } + else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.' + && !inrange(c, 0x00C0, 0x00D6) && !inrange(c, 0x00D8, 0x00F6) + && !inrange(c, 0x00F8, 0x02FF) && !inrange(c, 0x0370, 0x037D) + && !inrange(c, 0x037F, 0x1FFF) && !inrange(c, 0x200C, 0x200D) + && !inrange(c, 0x2070, 0x218F) && !inrange(c, 0x2C00, 0x2FEF) + && !inrange(c, 0x3001, 0xD7FF) && !inrange(c, 0xF900, 0xFDCF) + && !inrange(c, 0xFDF0, 0xFFFD) && c != 0x00B7 && !inrange(c, 0x0300, 0x036F) + && !inrange(c, 0x203F, 0x2040)) + { + // https://www.w3.org/TR/xml11/#NT-NameChar + // (currently we don't warn about invalid start chars) + SAL_WARN("sax", "unexpected character in attribute name: " << rName); + assert(!"unexpected character in attribute name"); + } + } +#endif +} + +SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName, + const Reference<XAttributeList>& xAttribs) +{ + FinishStartElement(); + +#ifdef DBG_UTIL + m_DebugStartedElements.push(rName); + ::std::set<OUString> DebugAttributes; +#endif + + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + SaxInvalidCharacterError eRet(SAX_NONE); + CheckValidName(rName); + if (!writeString(rName, false, false)) + eRet = SAX_ERROR; + + sal_Int16 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0; + for (sal_Int16 i = 0; i < nAttribCount; i++) + { + mp_Sequence[nCurrentPos] = ' '; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + OUString const& rAttrName(xAttribs->getNameByIndex(i)); +#ifdef DBG_UTIL + // Well-formedness constraint: Unique Att Spec + assert(DebugAttributes.find(rAttrName) == DebugAttributes.end()); + DebugAttributes.insert(rAttrName); +#endif + CheckValidName(rAttrName); + if (!writeString(rAttrName, false, false)) + eRet = SAX_ERROR; + + mp_Sequence[nCurrentPos] = '='; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '"'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + if (!writeString(xAttribs->getValueByIndex(i), true, true) && eRet != SAX_ERROR) + eRet = SAX_WARNING; + + mp_Sequence[nCurrentPos] = '"'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } + + m_bStartElementFinished = false; // because the '>' character is not added, + // because it is possible, that the "/>" + // characters have to add + return eRet; +} + +bool SaxWriterHelper::FinishEmptyElement() +{ + if (m_bStartElementFinished) + return false; + + mp_Sequence[nCurrentPos] = '/'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + m_bStartElementFinished = true; + + return true; +} + +bool SaxWriterHelper::endElement(const OUString& rName) +{ + FinishStartElement(); + + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '/'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + CheckValidName(rName); + bool bRet(writeString(rName, false, false)); + + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + return bRet; +} + +void SaxWriterHelper::endDocument() +{ + if (nCurrentPos > 0) + { + m_Sequence.realloc(nCurrentPos); + nCurrentPos = writeSequence(); + //m_Sequence.realloc(SEQUENCESIZE); + } +} + +void SaxWriterHelper::clearBuffer() +{ + FinishStartElement(); + if (nCurrentPos > 0) + { + m_Sequence.realloc(nCurrentPos); + nCurrentPos = writeSequence(); + m_Sequence.realloc(SEQUENCESIZE); + // Be sure to update the array pointer after the reallocation. + mp_Sequence = m_Sequence.getArray(); + } +} + +bool SaxWriterHelper::processingInstruction(const OUString& rTarget, const OUString& rData) +{ + FinishStartElement(); + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '?'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + bool bRet(writeString(rTarget, false, false)); + + mp_Sequence[nCurrentPos] = ' '; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + if (!writeString(rData, false, false)) + bRet = false; + + mp_Sequence[nCurrentPos] = '?'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + return bRet; +} + +void SaxWriterHelper::startCDATA() +{ + FinishStartElement(); + if ((nCurrentPos + 9) <= SEQUENCESIZE) + { + memcpy(&(mp_Sequence[nCurrentPos]), "<![CDATA[", 9); + nCurrentPos += 9; + } + else + AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("<![CDATA["), 9); + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); +} + +void SaxWriterHelper::endCDATA() +{ + FinishStartElement(); + if ((nCurrentPos + 3) <= SEQUENCESIZE) + { + memcpy(&(mp_Sequence[nCurrentPos]), "]]>", 3); + nCurrentPos += 3; + } + else + AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("]]>"), 3); + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); +} + +bool SaxWriterHelper::comment(const OUString& rComment) +{ + FinishStartElement(); + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '!'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + bool bRet(writeString(rComment, false, false)); + + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + return bRet; +} + +sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNormalization, + bool bNormalizeWhitespace) +{ + sal_Int32 nOutputLength = 0; + sal_uInt32 nSurrogate = 0; + + const sal_Unicode* pStr = rStr.getStr(); + sal_Int32 nStrLen = rStr.getLength(); + for (sal_Int32 i = 0; i < nStrLen; i++) + { + sal_uInt16 c = pStr[i]; + if (!IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F)) + { + if (bDoNormalization) + { + switch (c) + { + case '&': // resemble to & + nOutputLength += 5; + break; + case '<': // < + case '>': // > + nOutputLength += 4; + break; + case '\'': // ' + case '"': // " + case 13: // 
 + nOutputLength += 6; + break; + + case 10: // 
 + case 9: // 	 + if (bNormalizeWhitespace) + { + nOutputLength += 6; + } + else + { + nOutputLength++; + } + break; + default: + nOutputLength++; + } + } + else + { + nOutputLength++; + } + } + else + { + // Deal with replacements + if (bDoNormalization && !m_Replacements.empty()) + { + // search + const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i); + + if (it != nullptr) + { + nOutputLength + += ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8).getLength(); + i += it->replacement.getLength() - 1; + continue; + } + } + + // Deal with other unicode cases + if (rtl::isHighSurrogate(c)) + { + // save surrogate + nSurrogate = c; + } + else if (rtl::isLowSurrogate(c)) + { + // 2. surrogate: write as UTF-8 (if range is OK + if (nSurrogate) + nOutputLength += 4; + nSurrogate = 0; + } + else if (c > 0x07FF) + { + nOutputLength += 3; + } + else + { + nOutputLength += 2; + } + } + + // surrogate processing + if ((nSurrogate != 0) && !rtl::isHighSurrogate(c)) + nSurrogate = 0; + } + + return nOutputLength; +} + +const ReplacementPair* SaxWriterHelper::findXMLReplacement(const sal_Unicode* pStr, + sal_Int32 nStrLen) +{ + for (size_t iter = 0; iter < m_Replacements.size(); ++iter) + { + if (m_Replacements[iter].replacement.getLength() > nStrLen) + continue; + sal_Int32 matches = m_Replacements[iter].replacement.compareTo( + std::u16string_view(pStr, m_Replacements[iter].replacement.getLength())); + if (matches == 0) + return &m_Replacements[iter]; + if (matches > 0) + return nullptr; + } + return nullptr; +} + +class SAXWriter : public WeakImplHelper<XWriter, XServiceInfo> +{ +public: + SAXWriter() + : m_bDocStarted(false) + , m_bIsCDATA(false) + , m_bForceLineBreak(false) + , m_bAllowLineBreak(false) + , m_nLevel(0) + { + } + +public: // XActiveDataSource + virtual void SAL_CALL setOutputStream(const Reference<XOutputStream>& aStream) override + { + try + { + // temporary: set same stream again to clear buffer + if (m_out == aStream && m_pSaxWriterHelper && m_bDocStarted) + m_pSaxWriterHelper->clearBuffer(); + else + { + m_out = aStream; + m_pSaxWriterHelper.reset(new SaxWriterHelper(m_out)); + m_bDocStarted = false; + m_nLevel = 0; + m_bIsCDATA = false; + } + } + catch (const SAXException& e) + { + throw css::lang::WrappedTargetRuntimeException(e.Message, getXWeak(), + e.WrappedException); + } + } + virtual Reference<XOutputStream> SAL_CALL getOutputStream() override { return m_out; } + +public: // XDocumentHandler + virtual void SAL_CALL startDocument() override; + + virtual void SAL_CALL endDocument() override; + + virtual void SAL_CALL startElement(const OUString& aName, + const Reference<XAttributeList>& xAttribs) override; + + virtual void SAL_CALL endElement(const OUString& aName) override; + + virtual void SAL_CALL characters(const OUString& aChars) override; + + virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) override; + virtual void SAL_CALL processingInstruction(const OUString& aTarget, + const OUString& aData) override; + virtual void SAL_CALL setDocumentLocator(const Reference<XLocator>& xLocator) override; + virtual void SAL_CALL setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& + replacements) override; + +public: // XExtendedDocumentHandler + virtual void SAL_CALL startCDATA() override; + virtual void SAL_CALL endCDATA() override; + virtual void SAL_CALL comment(const OUString& sComment) override; + virtual void SAL_CALL unknown(const OUString& sString) override; + virtual void SAL_CALL allowLineBreak() override; + +public: // XServiceInfo + OUString SAL_CALL getImplementationName() override; + Sequence<OUString> SAL_CALL getSupportedServiceNames() override; + sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; + +private: + sal_Int32 getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept; + + Reference<XOutputStream> m_out; + std::unique_ptr<SaxWriterHelper> m_pSaxWriterHelper; + + // Status information + bool m_bDocStarted : 1; + bool m_bIsCDATA : 1; + bool m_bForceLineBreak : 1; + bool m_bAllowLineBreak : 1; + sal_Int32 m_nLevel; +}; + +sal_Int32 SAXWriter::getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept +{ + sal_Int32 nLength = -1; + if (m_pSaxWriterHelper) + { + if (m_bForceLineBreak + || (m_bAllowLineBreak + && ((nFirstLineBreakOccurrence + m_pSaxWriterHelper->GetLastColumnCount()) + > MAXCOLUMNCOUNT))) + nLength = m_nLevel; + } + m_bForceLineBreak = false; + m_bAllowLineBreak = false; + return nLength; +} + +bool isFirstCharWhitespace(const sal_Unicode* p) noexcept { return *p == ' '; } + +// XServiceInfo +OUString SAXWriter::getImplementationName() { return "com.sun.star.extensions.xml.sax.Writer"; } + +// XServiceInfo +sal_Bool SAXWriter::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +// XServiceInfo +Sequence<OUString> SAXWriter::getSupportedServiceNames() +{ + return { "com.sun.star.xml.sax.Writer" }; +} + +void SAXWriter::startDocument() +{ + if (m_bDocStarted || !m_out.is() || !m_pSaxWriterHelper) + { + throw SAXException(); + } + m_bDocStarted = true; + m_pSaxWriterHelper->startDocument(); +} + +void SAXWriter::endDocument() +{ + if (!m_bDocStarted) + { + throw SAXException("endDocument called before startDocument", Reference<XInterface>(), + Any()); + } + if (m_nLevel) + { + throw SAXException("unexpected end of document", Reference<XInterface>(), Any()); + } + m_pSaxWriterHelper->endDocument(); + try + { + m_out->closeOutput(); + } + catch (const IOException&) + { + css::uno::Any anyEx = cppu::getCaughtException(); + throw SAXException("IO exception during closing the IO Stream", Reference<XInterface>(), + anyEx); + } +} + +void SAXWriter::startElement(const OUString& aName, const Reference<XAttributeList>& xAttribs) +{ + if (!m_bDocStarted) + { + throw SAXException("startElement called before startDocument", {}, {}); + } + if (m_bIsCDATA) + { + throw SAXException("startElement call not allowed with CDATA sections", {}, {}); + } + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + { + sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0; + + nLength++; // "<" + nLength += m_pSaxWriterHelper->calcXMLByteLength(aName, false, false); // the tag name + + sal_Int16 n; + for (n = 0; n < static_cast<sal_Int16>(nAttribCount); n++) + { + nLength++; // " " + OUString tmp = xAttribs->getNameByIndex(n); + + nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, false, false); + + nLength += 2; // =" + + tmp = xAttribs->getValueByIndex(n); + + nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, true, true); + + nLength += 1; // " + } + + nLength++; // '>' + } + + // Is there a new indentation necessary ? + sal_Int32 nPrefix(getIndentPrefixLength(nLength)); + + // write into sequence + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + SaxInvalidCharacterError eRet(m_pSaxWriterHelper->startElement(aName, xAttribs)); + + m_nLevel++; + + if (eRet == SAX_WARNING) + { + throw SAXInvalidCharacterException( + "Invalid character during XML-Export in an attribute value", {}, {}); + } + else if (eRet == SAX_ERROR) + { + throw SAXException("Invalid character during XML-Export", {}, {}); + } +} + +void SAXWriter::endElement(const OUString& aName) +{ + if (!m_bDocStarted) + { + throw SAXException(); + } + m_nLevel--; + + if (m_nLevel < 0) + { + throw SAXException(); + } + bool bRet(true); + + // check here because Helper's endElement is not always called +#ifdef DBG_UTIL + assert(!m_pSaxWriterHelper->m_DebugStartedElements.empty()); + // Well-formedness constraint: Element Type Match + assert(aName == m_pSaxWriterHelper->m_DebugStartedElements.top()); + m_pSaxWriterHelper->m_DebugStartedElements.pop(); +#endif + + if (m_pSaxWriterHelper->FinishEmptyElement()) + m_bForceLineBreak = false; + else + { + // only ascii chars allowed + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + nLength = 3 + m_pSaxWriterHelper->calcXMLByteLength(aName, false, false); + sal_Int32 nPrefix = getIndentPrefixLength(nLength); + + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + bRet = m_pSaxWriterHelper->endElement(aName); + } + + if (!bRet) + { + throw SAXException("Invalid character during XML-Export", {}, {}); + } +} + +void SAXWriter::characters(const OUString& aChars) +{ + if (!m_bDocStarted) + { + throw SAXException("characters method called before startDocument", {}, {}); + } + + bool bThrowException(false); + if (!aChars.isEmpty()) + { + if (m_bIsCDATA) + bThrowException = !m_pSaxWriterHelper->writeString(aChars, false, false); + else + { + // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how + // many 2 and 3 byte chars are inbetween. However this whole stuff + // is eitherway for pretty printing only, so it does not need to be exact. + sal_Int32 nLength(0); + sal_Int32 nIndentPrefix(-1); + if (m_bAllowLineBreak) + { + // returns position of first ascii 10 within the string, -1 when no 10 in string. + sal_Int32 nFirstLineBreakOccurrence = aChars.indexOf(LINEFEED); + + nLength = m_pSaxWriterHelper->calcXMLByteLength(aChars, !m_bIsCDATA, false); + nIndentPrefix = getIndentPrefixLength( + nFirstLineBreakOccurrence >= 0 ? nFirstLineBreakOccurrence : nLength); + } + else + nIndentPrefix = getIndentPrefixLength(nLength); + + // insert indentation + if (nIndentPrefix >= 0) + { + if (isFirstCharWhitespace(aChars.getStr())) + m_pSaxWriterHelper->insertIndentation(nIndentPrefix - 1); + else + m_pSaxWriterHelper->insertIndentation(nIndentPrefix); + } + bThrowException = !m_pSaxWriterHelper->writeString(aChars, true, false); + } + } + if (bThrowException) + { + throw SAXInvalidCharacterException("Invalid character during XML-Export", {}, {}); + } +} + +void SAXWriter::ignorableWhitespace(const OUString&) +{ + if (!m_bDocStarted) + { + throw SAXException(); + } + + m_bForceLineBreak = true; +} + +void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData) +{ + if (!m_bDocStarted || m_bIsCDATA) + { + throw SAXException(); + } + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + { + nLength = 2; // "<?" + nLength += m_pSaxWriterHelper->calcXMLByteLength(aTarget, false, false); + + nLength += 1; // " " + + nLength += m_pSaxWriterHelper->calcXMLByteLength(aData, false, false); + + nLength += 2; // "?>" + } + + sal_Int32 nPrefix = getIndentPrefixLength(nLength); + + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + if (!m_pSaxWriterHelper->processingInstruction(aTarget, aData)) + { + throw SAXException("Invalid character during XML-Export", {}, {}); + } +} + +void SAXWriter::setDocumentLocator(const Reference<XLocator>&) {} + +void SAXWriter::setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements) +{ + m_pSaxWriterHelper->setCustomEntityNames(replacements); +} + +void SAXWriter::startCDATA() +{ + if (!m_bDocStarted || m_bIsCDATA) + { + throw SAXException(); + } + + sal_Int32 nPrefix = getIndentPrefixLength(9); + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + m_pSaxWriterHelper->startCDATA(); + + m_bIsCDATA = true; +} + +void SAXWriter::endCDATA() +{ + if (!m_bDocStarted || !m_bIsCDATA) + { + throw SAXException("endCDATA was called without startCDATA", {}, {}); + } + + sal_Int32 nPrefix = getIndentPrefixLength(3); + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + m_pSaxWriterHelper->endCDATA(); + + m_bIsCDATA = false; +} + +void SAXWriter::comment(const OUString& sComment) +{ + if (!m_bDocStarted || m_bIsCDATA) + { + throw SAXException(); + } + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + { + nLength = 4; // "<!--" + nLength += m_pSaxWriterHelper->calcXMLByteLength(sComment, false, false); + + nLength += 3; + } + + sal_Int32 nPrefix = getIndentPrefixLength(nLength); + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + if (!m_pSaxWriterHelper->comment(sComment)) + { + throw SAXException("Invalid character during XML-Export", {}, {}); + } +} + +void SAXWriter::allowLineBreak() +{ + if (!m_bDocStarted || m_bAllowLineBreak) + { + throw SAXException(); + } + + m_bAllowLineBreak = true; +} + +void SAXWriter::unknown(const OUString& sString) +{ + if (!m_bDocStarted) + { + throw SAXException(); + } + if (m_bIsCDATA) + { + throw SAXException(); + } + + if (sString.startsWith("<?xml")) + return; + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + nLength = m_pSaxWriterHelper->calcXMLByteLength(sString, false, false); + + sal_Int32 nPrefix = getIndentPrefixLength(nLength); + if (nPrefix >= 0) + m_pSaxWriterHelper->insertIndentation(nPrefix); + + if (!m_pSaxWriterHelper->writeString(sString, false, false)) + { + throw SAXException("Invalid character during XML-Export", {}, {}); + } +} + +} // namespace + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* +com_sun_star_extensions_xml_sax_Writer_get_implementation(css::uno::XComponentContext*, + css::uno::Sequence<css::uno::Any> const&) +{ + return cppu::acquire(new SAXWriter); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/expatwrap/xml2utf.cxx b/sax/source/expatwrap/xml2utf.cxx new file mode 100644 index 0000000000..5b3e4b9e1c --- /dev/null +++ b/sax/source/expatwrap/xml2utf.cxx @@ -0,0 +1,519 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#include <string.h> + +#include <algorithm> + +#include <sal/types.h> + +#include <rtl/textenc.h> +#include <rtl/tencinfo.h> +#include <com/sun/star/io/NotConnectedException.hpp> +#include <com/sun/star/io/XInputStream.hpp> +#include <xml2utf.hxx> +#include <memory> + + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::io; + + +namespace sax_expatwrap { + +sal_Int32 XMLFile2UTFConverter::readAndConvert( Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ) +{ + if( ! m_in.is() ) { + throw NotConnectedException(); + } + if( ! m_bStarted ) { + // it should be possible to find the encoding attribute + // within the first 512 bytes == 128 chars in UCS-4 + nMaxToRead = ::std::max( sal_Int32(512) , nMaxToRead ); + } + + sal_Int32 nRead; + Sequence< sal_Int8 > seqStart; + while( true ) + { + nRead = m_in->readSomeBytes( seq , nMaxToRead ); + + if( nRead + seqStart.getLength()) + { + // if nRead is 0, the file is already eof. + if( ! m_bStarted && nRead ) + { + // ensure that enough data is available to parse encoding + if( seqStart.hasElements() ) + { + // prefix with what we had so far. + sal_Int32 nLength = seq.getLength(); + seq.realloc( seqStart.getLength() + nLength ); + + memmove (seq.getArray() + seqStart.getLength(), + seq.getConstArray(), + nLength); + memcpy (seq.getArray(), + seqStart.getConstArray(), + seqStart.getLength()); + } + + // autodetection with the first bytes + if( ! isEncodingRecognizable( seq ) ) + { + // remember what we have so far. + seqStart = seq; + + // read more ! + continue; + } + if( scanForEncoding( seq ) || !m_sEncoding.isEmpty() ) { + // initialize decoding + initializeDecoding(); + } + seqStart = Sequence < sal_Int8 > (); + } + + // do the encoding + if( m_pText2Unicode && m_pUnicode2Text && + m_pText2Unicode->canContinue() ) { + + Sequence<sal_Unicode> seqUnicode = m_pText2Unicode->convert( seq ); + seq = m_pUnicode2Text->convert( seqUnicode.getConstArray(), seqUnicode.getLength() ); + } + + if( ! m_bStarted ) + { + // it must now be ensured, that no encoding attribute exist anymore + // ( otherwise the expat-Parser will crash ) + // This must be done after decoding ! + // ( e.g. Files decoded in ucs-4 cannot be read properly ) + m_bStarted = true; + removeEncoding( seq ); + } + nRead = seq.getLength(); + } + + break; + } + return nRead; +} + +void XMLFile2UTFConverter::removeEncoding( Sequence<sal_Int8> &seq ) +{ + const sal_Int8 *pSource = seq.getArray(); + if (seq.getLength() < 5 || strncmp(reinterpret_cast<const char *>(pSource), "<?xml", 5)) + return; + + // scan for encoding + OString str( reinterpret_cast<char const *>(pSource), seq.getLength() ); + + // cut sequence to first line break + // find first line break; + int nMax = str.indexOf( 10 ); + if( nMax >= 0 ) + { + str = str.copy( 0 , nMax ); + } + + int nFound = str.indexOf( " encoding" ); + if( nFound < 0 ) return; + + int nStop; + int nStart = str.indexOf( "\"" , nFound ); + if( nStart < 0 || str.indexOf( "'" , nFound ) < nStart ) + { + nStart = str.indexOf( "'" , nFound ); + nStop = str.indexOf( "'" , nStart +1 ); + } + else + { + nStop = str.indexOf( "\"" , nStart +1); + } + + if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop ) + { + // remove encoding tag from file + memmove( &( seq.getArray()[nFound] ) , + &( seq.getArray()[nStop+1]) , + seq.getLength() - nStop -1); + seq.realloc( seq.getLength() - ( nStop+1 - nFound ) ); + } +} + +// Checks, if enough data has been accumulated to recognize the encoding +bool XMLFile2UTFConverter::isEncodingRecognizable( const Sequence< sal_Int8 > &seq) +{ + const sal_Int8 *pSource = seq.getConstArray(); + bool bCheckIfFirstClosingBracketExists = false; + + if( seq.getLength() < 8 ) { + // no recognition possible, when less than 8 bytes are available + return false; + } + + if( ! strncmp( reinterpret_cast<const char *>(pSource), "<?xml", 5 ) ) { + // scan if the <?xml tag finishes within this buffer + bCheckIfFirstClosingBracketExists = true; + } + else if( ('<' == pSource[0] || '<' == pSource[2] ) && + ('?' == pSource[4] || '?' == pSource[6] ) ) + { + // check for utf-16 + bCheckIfFirstClosingBracketExists = true; + } + else if( ( '<' == pSource[1] || '<' == pSource[3] ) && + ( '?' == pSource[5] || '?' == pSource[7] ) ) + { + // check for + bCheckIfFirstClosingBracketExists = true; + } + + if( bCheckIfFirstClosingBracketExists ) + { + // whole <?xml tag is valid + return std::find(seq.begin(), seq.end(), '>') != seq.end(); + } + + // No <? tag in front, no need for a bigger buffer + return true; +} + +bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq ) +{ + const sal_uInt8 *pSource = reinterpret_cast<const sal_uInt8*>( seq.getConstArray() ); + bool bReturn = true; + + if( seq.getLength() < 4 ) { + // no recognition possible, when less than 4 bytes are available + return false; + } + + // first level : detect possible file formats + if (seq.getLength() >= 5 && !strncmp(reinterpret_cast<const char *>(pSource), "<?xml", 5)) { + // scan for encoding + OString str( reinterpret_cast<const char *>(pSource), seq.getLength() ); + + // cut sequence to first line break + //find first line break; + int nMax = str.indexOf( 10 ); + if( nMax >= 0 ) + { + str = str.copy( 0 , nMax ); + } + + int nFound = str.indexOf( " encoding" ); + if( nFound >= 0 ) { + int nStop; + int nStart = str.indexOf( "\"" , nFound ); + if( nStart < 0 || str.indexOf( "'" , nFound ) < nStart ) + { + nStart = str.indexOf( "'" , nFound ); + nStop = str.indexOf( "'" , nStart +1 ); + } + else + { + nStop = str.indexOf( "\"" , nStart +1); + } + if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop ) + { + // encoding found finally + m_sEncoding = str.copy( nStart+1 , nStop - nStart - 1 ); + } + } + } + else if( 0xFE == pSource[0] && + 0xFF == pSource[1] ) { + // UTF-16 big endian + // conversion is done so that encoding information can be easily extracted + m_sEncoding = "utf-16"_ostr; + } + else if( 0xFF == pSource[0] && + 0xFE == pSource[1] ) { + // UTF-16 little endian + // conversion is done so that encoding information can be easily extracted + m_sEncoding = "utf-16"_ostr; + } + else if( 0x00 == pSource[0] && 0x3c == pSource[1] && 0x00 == pSource[2] && 0x3f == pSource[3] ) { + // UTF-16 big endian without byte order mark (this is (strictly speaking) an error.) + // The byte order mark is simply added + + // simply add the byte order mark ! + seq.realloc( seq.getLength() + 2 ); + memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 ); + reinterpret_cast<sal_uInt8*>(seq.getArray())[0] = 0xFE; + reinterpret_cast<sal_uInt8*>(seq.getArray())[1] = 0xFF; + + m_sEncoding = "utf-16"_ostr; + } + else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x3f == pSource[2] && 0x00 == pSource[3] ) { + // UTF-16 little endian without byte order mark (this is (strictly speaking) an error.) + // The byte order mark is simply added + + seq.realloc( seq.getLength() + 2 ); + memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 ); + reinterpret_cast<sal_uInt8*>(seq.getArray())[0] = 0xFF; + reinterpret_cast<sal_uInt8*>(seq.getArray())[1] = 0xFE; + + m_sEncoding = "utf-16"_ostr; + } + else if( 0xEF == pSource[0] && + 0xBB == pSource[1] && + 0xBF == pSource[2] ) + { + // UTF-8 BOM (byte order mark); signifies utf-8, and not byte order + // The BOM is removed. + memmove( seq.getArray(), &( seq.getArray()[3] ), seq.getLength()-3 ); + seq.realloc( seq.getLength() - 3 ); + m_sEncoding = "utf-8"_ostr; + } + else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) { + // UCS-4 big endian + m_sEncoding = "ucs-4"_ostr; + } + else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x00 == pSource[3] ) { + // UCS-4 little endian + m_sEncoding = "ucs-4"_ostr; + } +/* TODO: no need to test for the moment since we return sal_False like default case anyway + else if( 0x4c == pSource[0] && 0x6f == pSource[1] && + 0xa7 == static_cast<unsigned char> (pSource[2]) && + 0x94 == static_cast<unsigned char> (pSource[3]) ) { + // EBCDIC + bReturn = sal_False; // must be extended + } +*/ + else { + // other + // UTF8 is directly recognized by the parser. + bReturn = false; + } + + return bReturn; +} + +void XMLFile2UTFConverter::initializeDecoding() +{ + + if( !m_sEncoding.isEmpty() ) + { + rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( m_sEncoding.getStr() ); + if( encoding != RTL_TEXTENCODING_UTF8 ) + { + m_pText2Unicode = std::make_unique<Text2UnicodeConverter>( m_sEncoding ); + m_pUnicode2Text = std::make_unique<Unicode2TextConverter>( RTL_TEXTENCODING_UTF8 ); + } + } +} + + +// Text2UnicodeConverter + + +Text2UnicodeConverter::Text2UnicodeConverter( const OString &sEncoding ) + : m_convText2Unicode(nullptr) + , m_contextText2Unicode(nullptr) +{ + rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( sEncoding.getStr() ); + if( RTL_TEXTENCODING_DONTKNOW == encoding ) + { + m_bCanContinue = false; + m_bInitialized = false; + } + else + { + init( encoding ); + } +} + +Text2UnicodeConverter::~Text2UnicodeConverter() +{ + if( m_bInitialized ) + { + rtl_destroyTextToUnicodeContext( m_convText2Unicode , m_contextText2Unicode ); + rtl_destroyUnicodeToTextConverter( m_convText2Unicode ); + } +} + +void Text2UnicodeConverter::init( rtl_TextEncoding encoding ) +{ + m_bCanContinue = true; + m_bInitialized = true; + + m_convText2Unicode = rtl_createTextToUnicodeConverter(encoding); + m_contextText2Unicode = rtl_createTextToUnicodeContext( m_convText2Unicode ); +} + + +Sequence<sal_Unicode> Text2UnicodeConverter::convert( const Sequence<sal_Int8> &seqText ) +{ + sal_uInt32 uiInfo; + sal_Size nSrcCvtBytes = 0; + sal_Size nTargetCount = 0; + sal_Size nSourceCount = 0; + + // the whole source size + sal_Int32 nSourceSize = seqText.getLength() + m_seqSource.getLength(); + Sequence<sal_Unicode> seqUnicode ( nSourceSize ); + + const sal_Int8 *pbSource = seqText.getConstArray(); + std::unique_ptr<sal_Int8[]> pbTempMem; + + if( m_seqSource.hasElements() ) { + // put old rest and new byte sequence into one array + pbTempMem.reset(new sal_Int8[ nSourceSize ]); + memcpy( pbTempMem.get() , m_seqSource.getConstArray() , m_seqSource.getLength() ); + memcpy( &(pbTempMem[ m_seqSource.getLength() ]) , seqText.getConstArray() , seqText.getLength() ); + pbSource = pbTempMem.get(); + + // set to zero again + m_seqSource = Sequence< sal_Int8 >(); + } + + while( true ) { + + /* All invalid characters are transformed to the unicode undefined char */ + nTargetCount += rtl_convertTextToUnicode( + m_convText2Unicode, + m_contextText2Unicode, + reinterpret_cast<const char *>(&( pbSource[nSourceCount] )), + nSourceSize - nSourceCount , + &( seqUnicode.getArray()[ nTargetCount ] ), + seqUnicode.getLength() - nTargetCount, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | + RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | + RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, + &uiInfo, + &nSrcCvtBytes ); + nSourceCount += nSrcCvtBytes; + + if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL ) { + // save necessary bytes for next conversion + seqUnicode.realloc( seqUnicode.getLength() * 2 ); + continue; + } + break; + } + if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL ) { + m_seqSource.realloc( nSourceSize - nSourceCount ); + memcpy( m_seqSource.getArray() , &(pbSource[nSourceCount]) , nSourceSize-nSourceCount ); + } + + // set to correct unicode size + seqUnicode.realloc( nTargetCount ); + + return seqUnicode; +} + + +// Unicode2TextConverter + + +Unicode2TextConverter::Unicode2TextConverter( rtl_TextEncoding encoding ) +{ + m_convUnicode2Text = rtl_createUnicodeToTextConverter( encoding ); + m_contextUnicode2Text = rtl_createUnicodeToTextContext( m_convUnicode2Text ); +} + + +Unicode2TextConverter::~Unicode2TextConverter() +{ + rtl_destroyUnicodeToTextContext( m_convUnicode2Text , m_contextUnicode2Text ); + rtl_destroyUnicodeToTextConverter( m_convUnicode2Text ); +} + + +Sequence<sal_Int8> Unicode2TextConverter::convert(const sal_Unicode *puSource , sal_Int32 nSourceSize) +{ + std::unique_ptr<sal_Unicode[]> puTempMem; + + if( m_seqSource.hasElements() ) { + // For surrogates ! + // put old rest and new byte sequence into one array + // In general when surrogates are used, they should be rarely + // cut off between two convert()-calls. So this code is used + // rarely and the extra copy is acceptable. + puTempMem.reset(new sal_Unicode[ nSourceSize + m_seqSource.getLength()]); + memcpy( puTempMem.get() , + m_seqSource.getConstArray() , + m_seqSource.getLength() * sizeof( sal_Unicode ) ); + memcpy( + &(puTempMem[ m_seqSource.getLength() ]) , + puSource , + nSourceSize*sizeof( sal_Unicode ) ); + puSource = puTempMem.get(); + nSourceSize += m_seqSource.getLength(); + + m_seqSource = Sequence< sal_Unicode > (); + } + + + sal_Size nTargetCount = 0; + sal_Size nSourceCount = 0; + + sal_uInt32 uiInfo; + sal_Size nSrcCvtChars; + + // take nSourceSize * 3 as preference + // this is an upper boundary for converting to utf8, + // which most often used as the target. + sal_Int32 nSeqSize = nSourceSize * 3; + + Sequence<sal_Int8> seqText( nSeqSize ); + char *pTarget = reinterpret_cast<char *>(seqText.getArray()); + while( true ) { + + nTargetCount += rtl_convertUnicodeToText( + m_convUnicode2Text, + m_contextUnicode2Text, + &( puSource[nSourceCount] ), + nSourceSize - nSourceCount , + &( pTarget[nTargetCount] ), + nSeqSize - nTargetCount, + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT | + RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT , + &uiInfo, + &nSrcCvtChars); + nSourceCount += nSrcCvtChars; + + if( uiInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL ) { + nSeqSize = nSeqSize *2; + seqText.realloc( nSeqSize ); // double array size + pTarget = reinterpret_cast<char *>(seqText.getArray()); + continue; + } + break; + } + + // for surrogates + if( uiInfo & RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL ) { + m_seqSource.realloc( nSourceSize - nSourceCount ); + memcpy( m_seqSource.getArray() , + &(puSource[nSourceCount]), + (nSourceSize - nSourceCount) * sizeof( sal_Unicode ) ); + } + + // reduce the size of the buffer (fast, no copy necessary) + seqText.realloc( nTargetCount ); + + return seqText; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx new file mode 100644 index 0000000000..e0338e053c --- /dev/null +++ b/sax/source/fastparser/fastparser.cxx @@ -0,0 +1,1680 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sax/fastparser.hxx> +#include <sax/fastattribs.hxx> +#include <utility> +#include <xml2utf.hxx> + +#include <com/sun/star/io/XSeekable.hpp> +#include <com/sun/star/lang/DisposedException.hpp> +#include <com/sun/star/lang/IllegalArgumentException.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/xml/sax/FastToken.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/xml/sax/XFastContextHandler.hpp> +#include <cppuhelper/implbase.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <cppuhelper/exc_hlp.hxx> +#include <osl/conditn.hxx> +#include <rtl/ref.hxx> +#include <sal/log.hxx> +#include <salhelper/thread.hxx> +#include <comphelper/diagnose_ex.hxx> +#include <o3tl/string_view.hxx> + +#include <queue> +#include <memory> +#include <mutex> +#include <optional> +#include <stack> +#include <string_view> +#include <unordered_map> +#include <vector> +#include <cassert> +#include <cstring> +#include <libxml/parser.h> + +// Inverse of libxml's BAD_CAST. +#define XML_CAST( str ) reinterpret_cast< const char* >( str ) + +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star::io; +using namespace com::sun::star; +using namespace sax_fastparser; + +static void NormalizeURI( OUString& rName ); + +namespace { + +struct Event; +class FastLocatorImpl; +struct NamespaceDefine; +struct Entity; + +typedef std::unordered_map< OUString, sal_Int32 > NamespaceMap; + +struct EventList +{ + std::vector<Event> maEvents; + bool mbIsAttributesEmpty; +}; + +enum class CallbackType { START_ELEMENT, END_ELEMENT, CHARACTERS, PROCESSING_INSTRUCTION, DONE, EXCEPTION }; + +struct Event +{ + CallbackType maType; + sal_Int32 mnElementToken; + OUString msNamespace; + OUString msElementName; + rtl::Reference< FastAttributeList > mxAttributes; + rtl::Reference< FastAttributeList > mxDeclAttributes; + OUString msChars; +}; + +struct NameWithToken +{ + OUString msName; + sal_Int32 mnToken; + + NameWithToken(OUString sName, sal_Int32 nToken) : + msName(std::move(sName)), mnToken(nToken) {} +}; + +struct SaxContext +{ + Reference< XFastContextHandler > mxContext; + sal_Int32 mnElementToken; + std::optional<OUString> moNamespace; + std::optional<OUString> moElementName; + + SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ): + mnElementToken(nElementToken) + { + if (nElementToken == FastToken::DONTKNOW) + { + moNamespace = aNamespace; + moElementName = aElementName; + } + } +}; + +struct ParserData +{ + css::uno::Reference< css::xml::sax::XFastDocumentHandler > mxDocumentHandler; + rtl::Reference<FastTokenHandlerBase> mxTokenHandler; + css::uno::Reference< css::xml::sax::XErrorHandler > mxErrorHandler; + css::uno::Reference< css::xml::sax::XFastNamespaceHandler >mxNamespaceHandler; + + ParserData(); +}; + +struct NamespaceDefine +{ + OString maPrefix; + sal_Int32 mnToken; + OUString maNamespaceURL; + + NamespaceDefine( OString aPrefix, sal_Int32 nToken, OUString aNamespaceURL ) + : maPrefix(std::move( aPrefix )), mnToken( nToken ), maNamespaceURL(std::move( aNamespaceURL )) {} + NamespaceDefine() : mnToken(-1) {} +}; + +// Entity binds all information needed for a single file | single call of parseStream +struct Entity : public ParserData +{ + // Amount of work producer sends to consumer in one iteration: + static const size_t mnEventListSize = 1000; + + // unique for each Entity instance: + + // Number of valid events in mxProducedEvents: + size_t mnProducedEventsSize; + std::optional<EventList> mxProducedEvents; + std::queue<EventList> maPendingEvents; + std::queue<EventList> maUsedEvents; + std::mutex maEventProtector; + + static const size_t mnEventLowWater = 4; + static const size_t mnEventHighWater = 8; + osl::Condition maConsumeResume; + osl::Condition maProduceResume; + // Event we use to store data if threading is disabled: + Event maSharedEvent; + + // copied in copy constructor: + + // Allow to disable threading for small documents: + bool mbEnableThreads; + css::xml::sax::InputSource maStructSource; + xmlParserCtxtPtr mpParser; + ::sax_expatwrap::XMLFile2UTFConverter maConverter; + + // Exceptions cannot be thrown through the C-XmlParser (possible + // resource leaks), therefore any exception thrown by a UNO callback + // must be saved somewhere until the C-XmlParser is stopped. + css::uno::Any maSavedException; + std::mutex maSavedExceptionMutex; + void saveException( const Any & e ); + // Thread-safe check if maSavedException has value + bool hasException(); + void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, + bool mbDuringParse ); + + std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack; + /* Context for main thread consuming events. + * startElement() stores the data, which characters() and endElement() uses + */ + std::stack< SaxContext, std::vector<SaxContext> > maContextStack; + // Determines which elements of maNamespaceDefines are valid in current context + std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount; + std::vector< NamespaceDefine > maNamespaceDefines; + + explicit Entity( const ParserData& rData ); + Entity( const Entity& rEntity ) = delete; + Entity& operator=( const Entity& rEntity ) = delete; + void startElement( Event const *pEvent ); + void characters( const OUString& sChars ); + void endElement(); + void processingInstruction( const OUString& rTarget, const OUString& rData ); + EventList& getEventList(); + Event& getEvent( CallbackType aType ); +}; + +// Stuff for custom entity names +struct ReplacementPair +{ + OUString name; + OUString replacement; +}; +inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs) +{ + return lhs.name < rhs.name; +} +inline bool operator<(const ReplacementPair& lhs, const char* rhs) +{ + return lhs.name.compareToAscii(rhs) < 0; +} + +} // namespace + +namespace sax_fastparser { + +class FastSaxParserImpl +{ +public: + explicit FastSaxParserImpl(); + ~FastSaxParserImpl(); + +private: + std::vector<ReplacementPair> m_Replacements; + std::vector<xmlEntityPtr> m_TemporalEntities; + +public: + // XFastParser + /// @throws css::xml::sax::SAXException + /// @throws css::io::IOException + /// @throws css::uno::RuntimeException + void parseStream( const css::xml::sax::InputSource& aInputSource ); + /// @throws css::uno::RuntimeException + void setFastDocumentHandler( const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& Handler ); + /// @throws css::uno::RuntimeException + void setTokenHandler( const css::uno::Reference< css::xml::sax::XFastTokenHandler >& Handler ); + /// @throws css::lang::IllegalArgumentException + /// @throws css::uno::RuntimeException + void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ); + /// @throws css::lang::IllegalArgumentException + /// @throws css::uno::RuntimeException + OUString const & getNamespaceURL( std::u16string_view rPrefix ); + /// @throws css::uno::RuntimeException + void setErrorHandler( const css::uno::Reference< css::xml::sax::XErrorHandler >& Handler ); + /// @throws css::uno::RuntimeException + void setNamespaceHandler( const css::uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler); + // Fake DTD file + void setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements); + + // called by the C callbacks of the expat parser + void callbackStartElement( const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, + int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes ); + void callbackEndElement(); + void callbackCharacters( const xmlChar* s, int nLen ); + void callbackProcessingInstruction( const xmlChar *target, const xmlChar *data ); + xmlEntityPtr callbackGetEntity( const xmlChar *name ); + + void pushEntity(const ParserData&, xml::sax::InputSource const&); + void popEntity(); + Entity& getEntity() { return *mpTop; } + void parse(); + void produce( bool bForceFlush = false ); + bool m_bIgnoreMissingNSDecl; + bool m_bDisableThreadedParser; + +private: + bool consume(EventList&); + void deleteUsedEvents(); + void sendPendingCharacters(); + void addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes); + + sal_Int32 GetToken( const xmlChar* pName ); + /// @throws css::xml::sax::SAXException + sal_Int32 GetTokenWithPrefix( const xmlChar* pPrefix, const xmlChar* pName ); + /// @throws css::xml::sax::SAXException + OUString const & GetNamespaceURL( std::string_view rPrefix ); + sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL ); + sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName ); + void DefineNamespace( const OString& rPrefix, const OUString& namespaceURL ); + +private: + std::mutex maMutex; ///< Protecting whole parseStream() execution + ::rtl::Reference< FastLocatorImpl > mxDocumentLocator; + NamespaceMap maNamespaceMap; + + ParserData maData; /// Cached parser configuration for next call of parseStream(). + + Entity *mpTop; /// std::stack::top() is amazingly slow => cache this. + std::stack< Entity > maEntities; /// Entity stack for each call of parseStream(). + std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent. +}; + +} // namespace sax_fastparser + +namespace { + +class ParserThread: public salhelper::Thread +{ + FastSaxParserImpl *mpParser; +public: + explicit ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {} +private: + virtual void execute() override + { + try + { + mpParser->parse(); + } + catch (...) + { + Entity &rEntity = mpParser->getEntity(); + rEntity.getEvent( CallbackType::EXCEPTION ); + mpParser->produce( true ); + } + } +}; + +extern "C" { + +static void call_callbackStartElement(void *userData, const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, + int numNamespaces, const xmlChar** namespaces, int numAttributes, int /*defaultedAttributes*/, const xmlChar **attributes) +{ + FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); + pFastParser->callbackStartElement( localName, prefix, URI, numNamespaces, namespaces, numAttributes, attributes ); +} + +static void call_callbackEndElement(void *userData, const xmlChar* /*localName*/, const xmlChar* /*prefix*/, const xmlChar* /*URI*/) +{ + FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); + pFastParser->callbackEndElement(); +} + +static void call_callbackCharacters( void *userData , const xmlChar *s , int nLen ) +{ + FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); + pFastParser->callbackCharacters( s, nLen ); +} + +static void call_callbackProcessingInstruction( void *userData, const xmlChar *target, const xmlChar *data ) +{ + FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); + pFastParser->callbackProcessingInstruction( target, data ); +} + +static xmlEntityPtr call_callbackGetEntity( void *userData, const xmlChar *name) +{ + FastSaxParserImpl* pFastParser = static_cast<FastSaxParserImpl*>( userData ); + return pFastParser->callbackGetEntity( name ); +} + +} + +class FastLocatorImpl : public WeakImplHelper< XLocator > +{ +public: + explicit FastLocatorImpl(FastSaxParserImpl *p) : mpParser(p) {} + + void dispose() { mpParser = nullptr; } + /// @throws RuntimeException + void checkDispose() const { if( !mpParser ) throw DisposedException(); } + + //XLocator + virtual sal_Int32 SAL_CALL getColumnNumber() override; + virtual sal_Int32 SAL_CALL getLineNumber() override; + virtual OUString SAL_CALL getPublicId() override; + virtual OUString SAL_CALL getSystemId() override; + +private: + FastSaxParserImpl *mpParser; +}; + +sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber() +{ + checkDispose(); + return xmlSAX2GetColumnNumber( mpParser->getEntity().mpParser ); +} + +sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber() +{ + checkDispose(); + return xmlSAX2GetLineNumber( mpParser->getEntity().mpParser ); +} + +OUString SAL_CALL FastLocatorImpl::getPublicId() +{ + checkDispose(); + return mpParser->getEntity().maStructSource.sPublicId; +} + +OUString SAL_CALL FastLocatorImpl::getSystemId() +{ + checkDispose(); + return mpParser->getEntity().maStructSource.sSystemId; +} + +ParserData::ParserData() +{} + +Entity::Entity(const ParserData& rData) + : ParserData(rData) + , mnProducedEventsSize(0) + , mbEnableThreads(false) + , mpParser(nullptr) +{ +} + +void Entity::startElement( Event const *pEvent ) +{ + const sal_Int32& nElementToken = pEvent->mnElementToken; + const OUString& aNamespace = pEvent->msNamespace; + const OUString& aElementName = pEvent->msElementName; + + // Use un-wrapped pointers to avoid significant acquire/release overhead + XFastContextHandler *pParentContext = nullptr; + if( !maContextStack.empty() ) + { + pParentContext = maContextStack.top().mxContext.get(); + if( !pParentContext ) + { + maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) ); + return; + } + } + + maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) ); + + try + { + const Reference< XFastAttributeList > & xAttr( pEvent->mxAttributes ); + Reference< XFastContextHandler > xContext; + + if ( mxNamespaceHandler.is() ) + { + const Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes(); + for (const auto& rNSDeclAttrib : NSDeclAttribs) + { + mxNamespaceHandler->registerNamespace( rNSDeclAttrib.Name, rNSDeclAttrib.Value ); + } + } + + if( nElementToken == FastToken::DONTKNOW ) + { + if( pParentContext ) + xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); + else if( mxDocumentHandler.is() ) + xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); + + if( xContext.is() ) + { + xContext->startUnknownElement( aNamespace, aElementName, xAttr ); + } + } + else + { + if( pParentContext ) + xContext = pParentContext->createFastChildContext( nElementToken, xAttr ); + else if( mxDocumentHandler.is() ) + xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); + + if( xContext.is() ) + xContext->startFastElement( nElementToken, xAttr ); + } + // swap the reference we own in to avoid referencing thrash. + maContextStack.top().mxContext = std::move( xContext ); + } + catch (...) + { + saveException( ::cppu::getCaughtException() ); + } +} + +void Entity::characters( const OUString& sChars ) +{ + if (maContextStack.empty()) + { + // Malformed XML stream !? + return; + } + + XFastContextHandler * pContext( maContextStack.top().mxContext.get() ); + if( pContext ) try + { + pContext->characters( sChars ); + } + catch (...) + { + saveException( ::cppu::getCaughtException() ); + } +} + +void Entity::endElement() +{ + if (maContextStack.empty()) + { + // Malformed XML stream !? + return; + } + + const SaxContext& aContext = maContextStack.top(); + XFastContextHandler* pContext( aContext.mxContext.get() ); + if( pContext ) + try + { + sal_Int32 nElementToken = aContext.mnElementToken; + if( nElementToken != FastToken::DONTKNOW ) + pContext->endFastElement( nElementToken ); + else + pContext->endUnknownElement( *aContext.moNamespace, *aContext.moElementName ); + } + catch (...) + { + saveException( ::cppu::getCaughtException() ); + } + maContextStack.pop(); +} + +void Entity::processingInstruction( const OUString& rTarget, const OUString& rData ) +{ + if( mxDocumentHandler.is() ) try + { + mxDocumentHandler->processingInstruction( rTarget, rData ); + } + catch (...) + { + saveException( ::cppu::getCaughtException() ); + } +} + +EventList& Entity::getEventList() +{ + if (!mxProducedEvents) + { + std::unique_lock aGuard(maEventProtector); + if (!maUsedEvents.empty()) + { + mxProducedEvents = std::move(maUsedEvents.front()); + maUsedEvents.pop(); + aGuard.unlock(); // unlock + mnProducedEventsSize = 0; + } + if (!mxProducedEvents) + { + mxProducedEvents.emplace(); + mxProducedEvents->maEvents.resize(mnEventListSize); + mxProducedEvents->mbIsAttributesEmpty = false; + mnProducedEventsSize = 0; + } + } + return *mxProducedEvents; +} + +Event& Entity::getEvent( CallbackType aType ) +{ + if (!mbEnableThreads) + return maSharedEvent; + + EventList& rEventList = getEventList(); + if (mnProducedEventsSize == rEventList.maEvents.size()) + { + SAL_WARN_IF(!maSavedException.hasValue(), "sax", + "Event vector should only exceed " << mnEventListSize << + " temporarily while an exception is pending"); + rEventList.maEvents.resize(mnProducedEventsSize + 1); + } + Event& rEvent = rEventList.maEvents[mnProducedEventsSize++]; + rEvent.maType = aType; + return rEvent; +} + +OUString lclGetErrorMessage( xmlParserCtxtPtr ctxt, std::u16string_view sSystemId, sal_Int32 nLine ) +{ + const char* pMessage; + const xmlError* error = xmlCtxtGetLastError( ctxt ); + if( error && error->message ) + pMessage = error->message; + else + pMessage = "unknown error"; + return OUString::Concat("[") + sSystemId + " line " + OUString::number(nLine) + "]: " + + OUString(pMessage, strlen(pMessage), RTL_TEXTENCODING_ASCII_US); +} + +// throw an exception, but avoid callback if +// during a threaded produce +void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, + bool mbDuringParse ) +{ + // Error during parsing ! + Any savedException; + { + std::scoped_lock g(maSavedExceptionMutex); + if (maSavedException.hasValue()) + { + savedException.setValue(&maSavedException, cppu::UnoType<decltype(maSavedException)>::get()); + } + } + SAXParseException aExcept( + lclGetErrorMessage( mpParser, + xDocumentLocator->getSystemId(), + xDocumentLocator->getLineNumber() ), + Reference< XInterface >(), + savedException, + xDocumentLocator->getPublicId(), + xDocumentLocator->getSystemId(), + xDocumentLocator->getLineNumber(), + xDocumentLocator->getColumnNumber() + ); + + // error handler is set, it may throw the exception + if( !mbDuringParse || !mbEnableThreads ) + { + if (mxErrorHandler.is() ) + mxErrorHandler->fatalError( Any( aExcept ) ); + } + + // error handler has not thrown, but parsing must stop => throw ourselves + throw aExcept; +} + +// In the single threaded case we emit events via our C +// callbacks, so any exception caught must be queued up until +// we can safely re-throw it from our C++ parent of parse() + +// If multi-threaded, we need to push an EXCEPTION event, at +// which point we transfer ownership of maSavedException to +// the consuming thread. +void Entity::saveException( const Any & e ) +{ + // fdo#81214 - allow the parser to run on after an exception, + // unexpectedly some 'startElements' produce a UNO_QUERY_THROW + // for XComponent; and yet expect to continue parsing. + SAL_WARN("sax", "Unexpected exception from XML parser " << exceptionToString(e)); + std::scoped_lock g(maSavedExceptionMutex); + if (maSavedException.hasValue()) + { + SAL_INFO("sax.fastparser", "discarding exception, already have one"); + } + else + { + maSavedException = e; + } +} + +bool Entity::hasException() +{ + std::scoped_lock g(maSavedExceptionMutex); + return maSavedException.hasValue(); +} + +} // namespace + +namespace sax_fastparser { + +FastSaxParserImpl::FastSaxParserImpl() : + m_bIgnoreMissingNSDecl(false), + m_bDisableThreadedParser(false), + mpTop(nullptr) +{ + mxDocumentLocator.set( new FastLocatorImpl( this ) ); +} + +FastSaxParserImpl::~FastSaxParserImpl() +{ + if( mxDocumentLocator.is() ) + mxDocumentLocator->dispose(); + for (auto& entity : m_TemporalEntities) + { + if (!entity) + continue; + xmlNodePtr pPtr = reinterpret_cast<xmlNodePtr>(entity); + xmlUnlinkNode(pPtr); + xmlFreeNode(pPtr); + } +} + +void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const OUString& namespaceURL ) +{ + Entity& rEntity = getEntity(); + assert(!rEntity.maNamespaceCount.empty()); // need a context! + + sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++; + if( rEntity.maNamespaceDefines.size() <= nOffset ) + rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 ); + + rEntity.maNamespaceDefines[nOffset] = NamespaceDefine( rPrefix, GetNamespaceToken( namespaceURL ), namespaceURL ); +} + +sal_Int32 FastSaxParserImpl::GetToken(const xmlChar* pName) +{ + return FastTokenHandlerBase::getTokenFromChars( getEntity(). mxTokenHandler.get(), + XML_CAST( pName ) ); // uses utf-8 +} + +sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, const xmlChar* pName ) +{ + Entity& rEntity = getEntity(); + if (rEntity.maNamespaceCount.empty()) + return FastToken::DONTKNOW; + + std::string_view sPrefix(XML_CAST(pPrefix)); + sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); + while( nNamespace-- ) + { + const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace]; + if( rNamespaceDefine.maPrefix == sPrefix ) + return GetTokenWithContextNamespace(rNamespaceDefine.mnToken, pName); + } + + if (!m_bIgnoreMissingNSDecl) + throw SAXException("No namespace defined for " + OStringToOUString(sPrefix, + RTL_TEXTENCODING_UTF8), {}, {}); + + return FastToken::DONTKNOW; +} + +sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL ) +{ + NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) ); + if( aIter != maNamespaceMap.end() ) + return (*aIter).second; + else + return FastToken::DONTKNOW; +} + +OUString const & FastSaxParserImpl::GetNamespaceURL( std::string_view rPrefix ) +{ + Entity& rEntity = getEntity(); + if( !rEntity.maNamespaceCount.empty() ) + { + sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); + while( nNamespace-- ) + if( rEntity.maNamespaceDefines[nNamespace].maPrefix == rPrefix ) + return rEntity.maNamespaceDefines[nNamespace].maNamespaceURL; + } + + throw SAXException("No namespace defined for " + OUString::fromUtf8(rPrefix), + Reference< XInterface >(), Any()); +} + +sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const xmlChar* pName ) +{ + if( nNamespaceToken != FastToken::DONTKNOW ) + { + sal_Int32 nNameToken = GetToken( pName ); + if( nNameToken != FastToken::DONTKNOW ) + return nNamespaceToken | nNameToken; + } + + return FastToken::DONTKNOW; +} + +namespace +{ + class ParserCleanup + { + private: + FastSaxParserImpl& m_rParser; + Entity& m_rEntity; + rtl::Reference<ParserThread> m_xParser; + public: + ParserCleanup(FastSaxParserImpl& rParser, Entity& rEntity) + : m_rParser(rParser) + , m_rEntity(rEntity) + { + } + ~ParserCleanup() + { + if (m_rEntity.mpParser) + { + if (m_rEntity.mpParser->myDoc) + xmlFreeDoc(m_rEntity.mpParser->myDoc); + xmlFreeParserCtxt(m_rEntity.mpParser); + } + joinThread(); + m_rParser.popEntity(); + } + void setThread(const rtl::Reference<ParserThread> &xParser) + { + m_xParser = xParser; + } + void joinThread() + { + if (m_xParser.is()) + { + rtl::Reference<ParserThread> xToJoin = m_xParser; + m_xParser.clear(); + xToJoin->join(); + } + } + }; +} +/*************** +* +* parseStream does Parser-startup initializations. The FastSaxParser::parse() method does +* the file-specific initialization work. (During a parser run, external files may be opened) +* +****************/ +void FastSaxParserImpl::parseStream(const InputSource& rStructSource) +{ + xmlInitParser(); + + // Only one text at one time + std::unique_lock guard( maMutex ); + + pushEntity(maData, rStructSource); + Entity& rEntity = getEntity(); + ParserCleanup aEnsureFree(*this, rEntity); + + // start the document + if( rEntity.mxDocumentHandler.is() ) + { + rEntity.mxDocumentHandler->setDocumentLocator( mxDocumentLocator ); + rEntity.mxDocumentHandler->startDocument(); + } + +#ifdef EMSCRIPTEN + rEntity.mbEnableThreads = false; +#else + if (!getenv("SAX_DISABLE_THREADS") && !m_bDisableThreadedParser) + { + Reference<css::io::XSeekable> xSeekable(rEntity.maStructSource.aInputStream, UNO_QUERY); + // available() is not __really__ relevant here, but leave it in as a heuristic for non-seekable streams + rEntity.mbEnableThreads = (xSeekable.is() && xSeekable->getLength() > 10000) + || (rEntity.maStructSource.aInputStream->available() > 10000); + } +#endif + + if (rEntity.mbEnableThreads) + { + rtl::Reference<ParserThread> xParser = new ParserThread(this); + xParser->launch(); + aEnsureFree.setThread(xParser); + bool done = false; + do { + rEntity.maConsumeResume.wait(); + rEntity.maConsumeResume.reset(); + + std::unique_lock aGuard(rEntity.maEventProtector); + while (!rEntity.maPendingEvents.empty()) + { + if (rEntity.maPendingEvents.size() <= Entity::mnEventLowWater) + rEntity.maProduceResume.set(); // start producer again + + EventList aEventList = std::move(rEntity.maPendingEvents.front()); + rEntity.maPendingEvents.pop(); + aGuard.unlock(); // unlock + + if (!consume(aEventList)) + done = true; + + aGuard.lock(); // lock + + if ( rEntity.maPendingEvents.size() <= Entity::mnEventLowWater ) + { + aGuard.unlock(); + for (auto& rEvent : aEventList.maEvents) + { + if (rEvent.mxAttributes.is()) + { + rEvent.mxAttributes->clear(); + if( rEntity.mxNamespaceHandler.is() ) + rEvent.mxDeclAttributes->clear(); + } + aEventList.mbIsAttributesEmpty = true; + } + aGuard.lock(); + } + + rEntity.maUsedEvents.push(std::move(aEventList)); + } + } while (!done); + aEnsureFree.joinThread(); + deleteUsedEvents(); + + // callbacks used inside XML_Parse may have caught an exception + // No need to lock maSavedExceptionMutex here because parser + // thread is joined. + if( rEntity.maSavedException.hasValue() ) + rEntity.throwException( mxDocumentLocator, true ); + } + else + { + parse(); + } + + // finish document + if( rEntity.mxDocumentHandler.is() ) + { + rEntity.mxDocumentHandler->endDocument(); + } +} + +void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) +{ + maData.mxDocumentHandler = Handler; +} + +void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler ) +{ + assert( dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ) && "we expect this handler to be a subclass of FastTokenHandlerBase" ); + maData.mxTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ); +} + +void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) +{ + if( NamespaceToken < FastToken::NAMESPACE ) + throw IllegalArgumentException("Invalid namespace token " + OUString::number(NamespaceToken), css::uno::Reference<css::uno::XInterface >(), 0); + + if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW ) + { + maNamespaceMap[ NamespaceURL ] = NamespaceToken; + return; + } + throw IllegalArgumentException("namespace URL is already registered: " + NamespaceURL, css::uno::Reference<css::uno::XInterface >(), 0); +} + +OUString const & FastSaxParserImpl::getNamespaceURL( std::u16string_view rPrefix ) +{ + try + { + return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) ); + } + catch (const Exception&) + { + } + throw IllegalArgumentException(); +} + +void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler) +{ + maData.mxErrorHandler = Handler; +} + +void FastSaxParserImpl::setNamespaceHandler( const Reference< XFastNamespaceHandler >& Handler ) +{ + maData.mxNamespaceHandler = Handler; +} + +void FastSaxParserImpl::setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements) +{ + m_Replacements.resize(replacements.size()); + for (size_t i = 0; i < replacements.size(); ++i) + { + m_Replacements[i].name = replacements[i].First; + m_Replacements[i].replacement = replacements[i].Second; + } + if (m_Replacements.size() > 1) + std::sort(m_Replacements.begin(), m_Replacements.end()); +} + +void FastSaxParserImpl::deleteUsedEvents() +{ + Entity& rEntity = getEntity(); + std::unique_lock aGuard(rEntity.maEventProtector); + + while (!rEntity.maUsedEvents.empty()) + { + { // the block makes sure that aEventList is destructed outside the lock + EventList aEventList = std::move(rEntity.maUsedEvents.front()); + rEntity.maUsedEvents.pop(); + + aGuard.unlock(); // unlock + } + + aGuard.lock(); // lock + } +} + +void FastSaxParserImpl::produce( bool bForceFlush ) +{ + Entity& rEntity = getEntity(); + if (!(bForceFlush || + rEntity.mnProducedEventsSize >= Entity::mnEventListSize)) + return; + + std::unique_lock aGuard(rEntity.maEventProtector); + + while (rEntity.maPendingEvents.size() >= Entity::mnEventHighWater) + { // pause parsing for a bit + aGuard.unlock(); // unlock + rEntity.maProduceResume.wait(); + rEntity.maProduceResume.reset(); + aGuard.lock(); // lock + } + + rEntity.maPendingEvents.push(std::move(*rEntity.mxProducedEvents)); + rEntity.mxProducedEvents.reset(); + assert(!rEntity.mxProducedEvents); + + aGuard.unlock(); // unlock + + rEntity.maConsumeResume.set(); +} + +bool FastSaxParserImpl::consume(EventList& rEventList) +{ + Entity& rEntity = getEntity(); + rEventList.mbIsAttributesEmpty = false; + for (auto& rEvent : rEventList.maEvents) + { + switch (rEvent.maType) + { + case CallbackType::START_ELEMENT: + rEntity.startElement( &rEvent ); + break; + case CallbackType::END_ELEMENT: + rEntity.endElement(); + break; + case CallbackType::CHARACTERS: + rEntity.characters( rEvent.msChars ); + break; + case CallbackType::PROCESSING_INSTRUCTION: + rEntity.processingInstruction( + rEvent.msNamespace, rEvent.msElementName ); // ( target, data ) + break; + case CallbackType::DONE: + return false; + case CallbackType::EXCEPTION: + rEntity.throwException( mxDocumentLocator, false ); + [[fallthrough]]; // avoid unreachable code warning with some compilers + default: + assert(false); + return false; + } + } + return true; +} + +void FastSaxParserImpl::pushEntity(const ParserData& rEntityData, + xml::sax::InputSource const& rSource) +{ + if (!rSource.aInputStream.is()) + throw SAXException("No input source", Reference<XInterface>(), Any()); + + maEntities.emplace(rEntityData); + mpTop = &maEntities.top(); + + mpTop->maStructSource = rSource; + + mpTop->maConverter.setInputStream(mpTop->maStructSource.aInputStream); + if (!mpTop->maStructSource.sEncoding.isEmpty()) + { + mpTop->maConverter.setEncoding(OUStringToOString(mpTop->maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US)); + } +} + +void FastSaxParserImpl::popEntity() +{ + maEntities.pop(); + mpTop = !maEntities.empty() ? &maEntities.top() : nullptr; +} + +// starts parsing with actual parser ! +void FastSaxParserImpl::parse() +{ + const int BUFFER_SIZE = 16 * 1024; + Sequence< sal_Int8 > seqOut( BUFFER_SIZE ); + + Entity& rEntity = getEntity(); + + // set all necessary C-Callbacks + static xmlSAXHandler callbacks; + callbacks.startElementNs = call_callbackStartElement; + callbacks.endElementNs = call_callbackEndElement; + callbacks.characters = call_callbackCharacters; + callbacks.processingInstruction = call_callbackProcessingInstruction; + callbacks.getEntity = call_callbackGetEntity; + callbacks.initialized = XML_SAX2_MAGIC; + int nRead = 0; + do + { + nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE ); + if( nRead <= 0 ) + { + if( rEntity.mpParser != nullptr ) + { + if( xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), 0, 1 ) != XML_ERR_OK ) + rEntity.throwException( mxDocumentLocator, true ); + if (rEntity.hasException()) + rEntity.throwException(mxDocumentLocator, true); + } + break; + } + + bool bContinue = true; + if( rEntity.mpParser == nullptr ) + { + // create parser with proper encoding (needs the first chunk of data) + rEntity.mpParser = xmlCreatePushParserCtxt( &callbacks, this, + reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, nullptr ); + if( !rEntity.mpParser ) + throw SAXException("Couldn't create parser", Reference< XInterface >(), Any() ); + + // Tell libxml2 parser to decode entities in attribute values. + // Also allow XML attribute values which are larger than 10MB, because this used to work + // with expat. + // coverity[unsafe_xml_parse_config] - entity support is required + xmlCtxtUseOptions(rEntity.mpParser, XML_PARSE_NOENT | XML_PARSE_HUGE); + } + else + { + bContinue = xmlParseChunk( rEntity.mpParser, reinterpret_cast<const char*>(seqOut.getConstArray()), nRead, 0 ) + == XML_ERR_OK; + } + + // callbacks used inside XML_Parse may have caught an exception + if (!bContinue) + { + rEntity.throwException( mxDocumentLocator, true ); + } + if (rEntity.hasException()) + { + rEntity.throwException( mxDocumentLocator, true ); + } + } while( nRead > 0 ); + rEntity.getEvent( CallbackType::DONE ); + if( rEntity.mbEnableThreads ) + produce( true ); +} + +// The C-Callbacks +void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI, + int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes) +{ + if (!pendingCharacters.empty()) + sendPendingCharacters(); + Entity& rEntity = getEntity(); + if( rEntity.maNamespaceCount.empty() ) + { + rEntity.maNamespaceCount.push(0); + DefineNamespace( "xml"_ostr, "http://www.w3.org/XML/1998/namespace"); + } + else + { + rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() ); + } + + // create attribute map and process namespace instructions + Event& rEvent = rEntity.getEvent( CallbackType::START_ELEMENT ); + bool bIsAttributesEmpty = false; + if ( rEntity.mbEnableThreads ) + bIsAttributesEmpty = rEntity.getEventList().mbIsAttributesEmpty; + + if (rEvent.mxAttributes.is()) + { + if( !bIsAttributesEmpty ) + rEvent.mxAttributes->clear(); + } + else + rEvent.mxAttributes.set( + new FastAttributeList( rEntity.mxTokenHandler.get() ) ); + + if( rEntity.mxNamespaceHandler.is() ) + { + if (rEvent.mxDeclAttributes.is()) + { + if( !bIsAttributesEmpty ) + rEvent.mxDeclAttributes->clear(); + } + else + rEvent.mxDeclAttributes.set( + new FastAttributeList( rEntity.mxTokenHandler.get() ) ); + } + + OUString sNamespace; + sal_Int32 nNamespaceToken = FastToken::DONTKNOW; + if (!rEntity.maNamespaceStack.empty()) + { + sNamespace = rEntity.maNamespaceStack.top().msName; + nNamespaceToken = rEntity.maNamespaceStack.top().mnToken; + } + + try + { + /* #158414# Each element may define new namespaces, also for attributes. + First, process all namespaces, second, process the attributes after namespaces + have been initialized. */ + + // #158414# first: get namespaces + for (int i = 0; i < numNamespaces * 2; i += 2) + { + // namespaces[] is (prefix/URI) + if( namespaces[ i ] != nullptr ) + { + OString aPrefix( XML_CAST( namespaces[ i ] )); + OUString namespaceURL( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); + NormalizeURI( namespaceURL ); + DefineNamespace(aPrefix, namespaceURL); + if( rEntity.mxNamespaceHandler.is() ) + rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + } + else + { + // default namespace + sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ); + NormalizeURI( sNamespace ); + nNamespaceToken = GetNamespaceToken( sNamespace ); + if( rEntity.mxNamespaceHandler.is() ) + rEvent.mxDeclAttributes->addUnknown( ""_ostr, OString( XML_CAST( namespaces[ i + 1 ] ) ) ); + } + } + + if ( rEntity.mxTokenHandler.is() ) + { + // #158414# second: fill attribute list with other attributes + rEvent.mxAttributes->reserve( numAttributes ); + for (int i = 0; i < numAttributes * 5; i += 5) + { + // attributes[] is ( localname / prefix / nsURI / valueBegin / valueEnd ) + if( attributes[ i + 1 ] != nullptr ) + { + sal_Int32 nAttributeToken = GetTokenWithPrefix(attributes[ i + 1 ], attributes[ i ]); + if( nAttributeToken != FastToken::DONTKNOW ) + rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) ); + else + addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes); + } + else + { + sal_Int32 nAttributeToken = GetToken(attributes[ i ]); + if( nAttributeToken != FastToken::DONTKNOW ) + rEvent.mxAttributes->add( nAttributeToken, std::string_view(XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ]) ); + else + { + SAL_WARN("xmloff", "unknown attribute " << XML_CAST( attributes[ i ] ) << "=" << + OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ), + OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + } + } + } + + if( prefix != nullptr ) + rEvent.mnElementToken = GetTokenWithPrefix(prefix, localName); + else if( !sNamespace.isEmpty() ) + rEvent.mnElementToken = GetTokenWithContextNamespace(nNamespaceToken, localName); + else + rEvent.mnElementToken = GetToken(localName); + } + else + { + for (int i = 0; i < numAttributes * 5; i += 5) + { + if( attributes[ i + 1 ] != nullptr ) + addUnknownElementWithPrefix(attributes, i, rEvent.mxAttributes); + else + rEvent.mxAttributes->addUnknown( XML_CAST( attributes[ i ] ), + OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + } + + rEvent.mnElementToken = FastToken::DONTKNOW; + } + + if( rEvent.mnElementToken == FastToken::DONTKNOW ) + { + OUString aElementPrefix; + if( prefix != nullptr ) + { + aElementPrefix = OUString( XML_CAST( prefix ), strlen( XML_CAST( prefix )), RTL_TEXTENCODING_UTF8 ); + if ( URI != nullptr ) + sNamespace = OUString( XML_CAST( URI ), strlen( XML_CAST( URI )), RTL_TEXTENCODING_UTF8 ); + else if ( m_bIgnoreMissingNSDecl ) + sNamespace.clear(); + else + throw SAXException("No namespace defined for " + aElementPrefix, {}, {}); + nNamespaceToken = GetNamespaceToken( sNamespace ); + } + OUString aElementLocalName( XML_CAST( localName ), strlen( XML_CAST( localName )), RTL_TEXTENCODING_UTF8 ); + rEvent.msNamespace = sNamespace; + if( aElementPrefix.isEmpty() ) + rEvent.msElementName = std::move(aElementLocalName); + else + rEvent.msElementName = aElementPrefix + ":" + aElementLocalName; + } + else // token is always preferred. + rEvent.msElementName.clear(); + + rEntity.maNamespaceStack.push( NameWithToken(sNamespace, nNamespaceToken) ); + if (rEntity.mbEnableThreads) + produce(); + else + { + SAL_INFO("sax.fastparser", " startElement line " << mxDocumentLocator->getLineNumber() << " column " << mxDocumentLocator->getColumnNumber() << " " << ( prefix ? XML_CAST(prefix) : "(null)" ) << ":" << localName); + rEntity.startElement( &rEvent ); + } + } + catch (...) + { + rEntity.saveException( ::cppu::getCaughtException() ); + } +} + +void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes, int i, rtl::Reference< FastAttributeList > const & xAttributes) +{ + OUString aNamespaceURI; + if ( !m_bIgnoreMissingNSDecl || attributes[i + 2] != nullptr ) + aNamespaceURI = OUString( XML_CAST( attributes[ i + 2 ] ), strlen( XML_CAST( attributes[ i + 2 ] )), RTL_TEXTENCODING_UTF8 ); + const OString& rPrefix = OString( XML_CAST( attributes[ i + 1 ] )); + const OString& rLocalName = OString( XML_CAST( attributes[ i ] )); + OString aQualifiedName = (rPrefix.isEmpty())? rLocalName : rPrefix + ":" + rLocalName; + xAttributes->addUnknown( aNamespaceURI, aQualifiedName, + OString( XML_CAST( attributes[ i + 3 ] ), attributes[ i + 4 ] - attributes[ i + 3 ] )); + SAL_INFO("xmloff", "unknown element " << aQualifiedName << " " << aNamespaceURI); +} + +void FastSaxParserImpl::callbackEndElement() +{ + if (!pendingCharacters.empty()) + sendPendingCharacters(); + Entity& rEntity = getEntity(); + SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount"); + if( !rEntity.maNamespaceCount.empty() ) + rEntity.maNamespaceCount.pop(); + + SAL_WARN_IF(rEntity.maNamespaceStack.empty(), "sax", "Empty NamespaceStack"); + if( !rEntity.maNamespaceStack.empty() ) + rEntity.maNamespaceStack.pop(); + + rEntity.getEvent( CallbackType::END_ELEMENT ); + if (rEntity.mbEnableThreads) + produce(); + else + rEntity.endElement(); +} + +void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen ) +{ + // SAX interface allows that the characters callback splits content of one XML node + // (e.g. because there's an entity that needs decoding), however for consumers it's + // simpler FastSaxParser's character callback provides the whole string at once, + // so merge data from possible multiple calls and send them at once (before the element + // ends or another one starts). + // + // We use a std::vector<char> to avoid calling into the OUString constructor more than once when + // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly + // often in writer documents. + int nOriginalLen = pendingCharacters.size(); + pendingCharacters.resize(nOriginalLen + nLen); + memcpy(pendingCharacters.data() + nOriginalLen, s, nLen); +} + +void FastSaxParserImpl::sendPendingCharacters() +{ + Entity& rEntity = getEntity(); + OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 ); + if (rEntity.mbEnableThreads) + { + Event& rEvent = rEntity.getEvent( CallbackType::CHARACTERS ); + rEvent.msChars = std::move(sChars); + produce(); + } + else + rEntity.characters( sChars ); + pendingCharacters.resize(0); +} + +void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data ) +{ + if (!pendingCharacters.empty()) + sendPendingCharacters(); + Entity& rEntity = getEntity(); + Event& rEvent = rEntity.getEvent( CallbackType::PROCESSING_INSTRUCTION ); + + // This event is very rare, so no need to waste extra space for this + // Using namespace and element strings to be target and data in that order. + rEvent.msNamespace = OUString( XML_CAST( target ), strlen( XML_CAST( target ) ), RTL_TEXTENCODING_UTF8 ); + if ( data != nullptr ) + rEvent.msElementName = OUString( XML_CAST( data ), strlen( XML_CAST( data ) ), RTL_TEXTENCODING_UTF8 ); + else + rEvent.msElementName.clear(); + + if (rEntity.mbEnableThreads) + produce(); + else + rEntity.processingInstruction( rEvent.msNamespace, rEvent.msElementName ); +} + +xmlEntityPtr FastSaxParserImpl::callbackGetEntity( const xmlChar *name ) +{ + if( !name ) + return xmlGetPredefinedEntity(name); + const char* dname = XML_CAST(name); + int lname = strlen(dname); + if( lname == 0 ) + return xmlGetPredefinedEntity(name); + if (m_Replacements.size() > 0) + { + auto it = std::lower_bound(m_Replacements.begin(), m_Replacements.end(), dname); + if (it != m_Replacements.end() && it->name.compareToAscii(dname) == 0) + { + xmlEntityPtr entpt = xmlNewEntity( + nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, + BAD_CAST(OUStringToOString(it->replacement, RTL_TEXTENCODING_UTF8).getStr())); + m_TemporalEntities.push_back(entpt); + return entpt; + } + } + if( lname < 2 ) + return xmlGetPredefinedEntity(name); + if ( dname[0] == '#' ) + { + sal_uInt32 cval = 0; + if( dname[1] == 'x' || dname[1] == 'X' ) + { + if( lname < 3 ) + return xmlGetPredefinedEntity(name); + cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 16 ) ); + if( cval == 0 ) + return xmlGetPredefinedEntity(name); + OUString vname( &cval, 1 ); + xmlEntityPtr entpt + = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, + BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr())); + m_TemporalEntities.push_back(entpt); + return entpt; + } + else + { + cval = static_cast<sal_uInt32>( strtoul( dname + 2, nullptr, 10 ) ); + if( cval == 0 ) + return xmlGetPredefinedEntity(name); + OUString vname(&cval, 1); + xmlEntityPtr entpt + = xmlNewEntity(nullptr, name, XML_INTERNAL_GENERAL_ENTITY, nullptr, nullptr, + BAD_CAST(OUStringToOString(vname, RTL_TEXTENCODING_UTF8).getStr())); + m_TemporalEntities.push_back(entpt); + return entpt; + } + } + return xmlGetPredefinedEntity(name); +} + +FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl) {} + +FastSaxParser::~FastSaxParser() +{ +} + +void SAL_CALL +FastSaxParser::initialize(css::uno::Sequence< css::uno::Any > const& rArguments) +{ + if (!rArguments.hasElements()) + return; + + OUString str; + if ( !(rArguments[0] >>= str) ) + throw IllegalArgumentException(); + + if ( str == "IgnoreMissingNSDecl" ) + mpImpl->m_bIgnoreMissingNSDecl = true; + else if ( str == "DoSmeplease" ) + ; //just ignore as this is already immune to billion laughs + else if ( str == "DisableThreadedParser" ) + mpImpl->m_bDisableThreadedParser = true; + else + throw IllegalArgumentException(); + +} + +void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource ) +{ + mpImpl->parseStream(aInputSource); +} + +void FastSaxParser::setFastDocumentHandler( const uno::Reference<xml::sax::XFastDocumentHandler>& Handler ) +{ + mpImpl->setFastDocumentHandler(Handler); +} + +void FastSaxParser::setTokenHandler( const uno::Reference<xml::sax::XFastTokenHandler>& Handler ) +{ + mpImpl->setTokenHandler(Handler); +} + +void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) +{ + mpImpl->registerNamespace(NamespaceURL, NamespaceToken); +} + +OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix ) +{ + return mpImpl->getNamespaceURL(rPrefix); +} + +void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler ) +{ + mpImpl->setErrorHandler(Handler); +} + +void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& ) +{ + // not implemented +} + +void FastSaxParser::setLocale( const lang::Locale& ) +{ + // not implemented +} + +void FastSaxParser::setNamespaceHandler( const uno::Reference< css::xml::sax::XFastNamespaceHandler >& Handler) +{ + mpImpl->setNamespaceHandler(Handler); +} + +OUString FastSaxParser::getImplementationName() +{ + return "com.sun.star.comp.extensions.xml.sax.FastParser"; +} + +void FastSaxParser::setCustomEntityNames( + const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements) +{ + mpImpl->setCustomEntityNames(replacements); +} + +sal_Bool FastSaxParser::supportsService( const OUString& ServiceName ) +{ + return cppu::supportsService(this, ServiceName); +} + +uno::Sequence<OUString> FastSaxParser::getSupportedServiceNames() +{ + return { "com.sun.star.xml.sax.FastParser" }; +} + +} // namespace sax_fastparser + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_comp_extensions_xml_sax_FastParser_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new FastSaxParser); +} + +// ---------------------------------------------------------- +// copy of the code in xmloff/source/core/namespace.cxx, which adds namespace aliases +// for various dodgy namespace decls in the wild. + +static bool NormalizeW3URI( OUString& rName ); +static bool NormalizeOasisURN( OUString& rName ); + +static void NormalizeURI( OUString& rName ) +{ + // try OASIS + W3 URI normalization + bool bSuccess = NormalizeOasisURN( rName ); + if( ! bSuccess ) + NormalizeW3URI( rName ); +} + +constexpr OUStringLiteral XML_URI_W3_PREFIX(u"http://www.w3.org/"); +constexpr OUStringLiteral XML_URI_XFORMS_SUFFIX(u"/xforms"); +constexpr OUStringLiteral XML_N_XFORMS_1_0(u"http://www.w3.org/2002/xforms"); +constexpr OUStringLiteral XML_N_SVG(u"http://www.w3.org/2000/svg"); +constexpr OUStringLiteral XML_N_SVG_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"); +constexpr OUStringLiteral XML_N_FO(u"http://www.w3.org/1999/XSL/Format"); +constexpr OUStringLiteral XML_N_FO_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"); +constexpr OUStringLiteral XML_N_SMIL(u"http://www.w3.org/2001/SMIL20/"); +constexpr OUStringLiteral XML_N_SMIL_OLD(u"http://www.w3.org/2001/SMIL20"); +constexpr OUStringLiteral XML_N_SMIL_COMPAT(u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"); +constexpr OUStringLiteral XML_URN_OASIS_NAMES_TC(u"urn:oasis:names:tc"); +constexpr OUStringLiteral XML_XMLNS(u"xmlns"); +constexpr OUStringLiteral XML_OPENDOCUMENT(u"opendocument"); +constexpr OUStringLiteral XML_1_0(u"1.0"); + +static bool NormalizeW3URI( OUString& rName ) +{ + // check if URI matches: + // http://www.w3.org/[0-9]*/[:letter:]* + // (year)/(WG name) + // For the following WG/standards names: + // - xforms + + bool bSuccess = false; + const OUString& sURIPrefix = XML_URI_W3_PREFIX; + if( rName.startsWith( sURIPrefix ) ) + { + const OUString& sURISuffix = XML_URI_XFORMS_SUFFIX ; + sal_Int32 nCompareFrom = rName.getLength() - sURISuffix.getLength(); + if( rName.subView( nCompareFrom ) == sURISuffix ) + { + // found W3 prefix, and xforms suffix + rName = XML_N_XFORMS_1_0; + bSuccess = true; + } + } + return bSuccess; +} + +static bool NormalizeOasisURN( OUString& rName ) +{ + // #i38644# + // we exported the wrong namespace for smil, so we correct this here on load + // for older documents + if( rName == XML_N_SVG ) + { + rName = XML_N_SVG_COMPAT; + return true; + } + else if( rName == XML_N_FO ) + { + rName = XML_N_FO_COMPAT; + return true; + } + else if( rName == XML_N_SMIL || rName == XML_N_SMIL_OLD ) + { + rName = XML_N_SMIL_COMPAT; + return true; + } + + + // Check if URN matches + // :urn:oasis:names:tc:[^:]*:xmlns:[^:]*:1.[^:]* + // |---| |---| |-----| + // TC-Id Sub-Id Version + + sal_Int32 nNameLen = rName.getLength(); + // :urn:oasis:names:tc.* + const OUString& rOasisURN = XML_URN_OASIS_NAMES_TC; + if( !rName.startsWith( rOasisURN ) ) + return false; + + // :urn:oasis:names:tc:.* + sal_Int32 nPos = rOasisURN.getLength(); + if( nPos >= nNameLen || rName[nPos] != ':' ) + return false; + + // :urn:oasis:names:tc:[^:]:.* + sal_Int32 nTCIdStart = nPos+1; + sal_Int32 nTCIdEnd = rName.indexOf( ':', nTCIdStart ); + if( -1 == nTCIdEnd ) + return false; + + // :urn:oasis:names:tc:[^:]:xmlns.* + nPos = nTCIdEnd + 1; + std::u16string_view sTmp( rName.subView( nPos ) ); + const OUString& rXMLNS = XML_XMLNS; + if( !o3tl::starts_with(sTmp, rXMLNS ) ) + return false; + + // :urn:oasis:names:tc:[^:]:xmlns:.* + nPos += rXMLNS.getLength(); + if( nPos >= nNameLen || rName[nPos] != ':' ) + return false; + + // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:.* + nPos = rName.indexOf( ':', nPos+1 ); + if( -1 == nPos ) + return false; + + // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:[^:][^:][^:][^:]* + sal_Int32 nVersionStart = nPos+1; + if( nVersionStart+2 >= nNameLen || + -1 != rName.indexOf( ':', nVersionStart ) ) + return false; + + // :urn:oasis:names:tc:[^:]:xmlns:[^:]*:1\.[^:][^:]* + if( rName[nVersionStart] != '1' || rName[nVersionStart+1] != '.' ) + return false; + + // replace [tcid] with current TCID and version with current version. + + rName = rName.subView( 0, nTCIdStart ) + + XML_OPENDOCUMENT + + rName.subView( nTCIdEnd, nVersionStart-nTCIdEnd ) + + XML_1_0; + + return true; +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/fastparser/legacyfastparser.cxx b/sax/source/fastparser/legacyfastparser.cxx new file mode 100644 index 0000000000..e7afc55d93 --- /dev/null +++ b/sax/source/fastparser/legacyfastparser.cxx @@ -0,0 +1,375 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/FastParser.hpp> +#include <com/sun/star/lang/XInitialization.hpp> +#include <com/sun/star/beans/Pair.hpp> +#include <comphelper/attributelist.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <comphelper/processfactory.hxx> +#include <rtl/ref.hxx> +#include <memory> +#include <utility> +#include <vector> + +using namespace ::cppu; +using namespace css; +using namespace uno; +using namespace lang; +using namespace xml::sax; +using namespace io; + +namespace { + +class NamespaceHandler : public WeakImplHelper< XFastNamespaceHandler > +{ +private: + struct NamespaceDefine + { + OUString m_aPrefix; + OUString m_aNamespaceURI; + + NamespaceDefine( OUString aPrefix, OUString aNamespaceURI ) : m_aPrefix(std::move( aPrefix )), m_aNamespaceURI(std::move( aNamespaceURI )) {} + }; + std::vector< std::unique_ptr< NamespaceDefine > > m_aNamespaceDefines; + +public: + NamespaceHandler(); + void addNSDeclAttributes( rtl::Reference < comphelper::AttributeList > const & rAttrList ); + + //XFastNamespaceHandler + virtual void SAL_CALL registerNamespace( const OUString& rNamespacePrefix, const OUString& rNamespaceURI ) override; + virtual OUString SAL_CALL getNamespaceURI( const OUString& rNamespacePrefix ) override; +}; + +NamespaceHandler::NamespaceHandler() +{ +} + +void NamespaceHandler::addNSDeclAttributes( rtl::Reference < comphelper::AttributeList > const & rAttrList ) +{ + for(const auto& aNamespaceDefine : m_aNamespaceDefines) + { + OUString& rPrefix = aNamespaceDefine->m_aPrefix; + OUString& rNamespaceURI = aNamespaceDefine->m_aNamespaceURI; + OUString sDecl; + if ( rPrefix.isEmpty() ) + sDecl = "xmlns"; + else + sDecl = "xmlns:" + rPrefix; + rAttrList->AddAttribute( sDecl, rNamespaceURI ); + } + m_aNamespaceDefines.clear(); +} + +void NamespaceHandler::registerNamespace( const OUString& rNamespacePrefix, const OUString& rNamespaceURI ) +{ + m_aNamespaceDefines.push_back( std::make_unique<NamespaceDefine>( + rNamespacePrefix, rNamespaceURI) ); +} + +OUString NamespaceHandler::getNamespaceURI( const OUString&/* rNamespacePrefix */ ) +{ + return OUString(); +} + +class SaxLegacyFastParser : public WeakImplHelper< XInitialization, XServiceInfo, XParser > +{ +private: + rtl::Reference< NamespaceHandler > m_aNamespaceHandler; +public: + SaxLegacyFastParser(); + +// css::lang::XInitialization: + virtual void SAL_CALL initialize(css::uno::Sequence<css::uno::Any> const& rArguments) override; + +// The SAX-Parser-Interface + virtual void SAL_CALL parseStream( const InputSource& structSource) override; + virtual void SAL_CALL setDocumentHandler(const Reference< XDocumentHandler > & xHandler) override; + virtual void SAL_CALL setErrorHandler(const Reference< XErrorHandler > & xHandler) override; + virtual void SAL_CALL setDTDHandler(const Reference < XDTDHandler > & xHandler) override; + virtual void SAL_CALL setEntityResolver(const Reference< XEntityResolver >& xResolver) override; + virtual void SAL_CALL setLocale( const Locale &locale ) override; + +// XServiceInfo + OUString SAL_CALL getImplementationName() override; + Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; + +private: + Reference< XFastParser > m_xParser; + Reference< XDocumentHandler > m_xDocumentHandler; + Reference< XFastTokenHandler > m_xTokenHandler; + +}; + + +class CallbackDocumentHandler : public WeakImplHelper< XFastDocumentHandler > +{ +private: + Reference< XDocumentHandler > m_xDocumentHandler; + Reference< XFastTokenHandler > m_xTokenHandler; + rtl::Reference< NamespaceHandler > m_aNamespaceHandler; + OUString getNamespacePrefixFromToken( sal_Int32 nToken ); + OUString getNameFromToken( sal_Int32 nToken ); + + static constexpr OUString aDefaultNamespace = u""_ustr; + static constexpr OUString aNamespaceSeparator = u":"_ustr; + +public: + CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, + rtl::Reference< NamespaceHandler > const & rNamespaceHandler, + Reference< XFastTokenHandler > const & xTokenHandler); + + // XFastDocumentHandler + virtual void SAL_CALL startDocument() override; + virtual void SAL_CALL endDocument() override; + virtual void SAL_CALL processingInstruction( const OUString& rTarget, const OUString& rData ) override; + virtual void SAL_CALL setDocumentLocator( const Reference< XLocator >& xLocator ) override; + + // XFastContextHandler + virtual void SAL_CALL startFastElement( sal_Int32 nElement, const Reference< XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL startUnknownElement( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL endFastElement( sal_Int32 Element ) override; + virtual void SAL_CALL endUnknownElement( const OUString& Namespace, const OUString& Name ) override; + virtual Reference< XFastContextHandler > SAL_CALL createFastChildContext( sal_Int32 nElement, const Reference< XFastAttributeList >& Attribs ) override; + virtual Reference< XFastContextHandler > SAL_CALL createUnknownChildContext( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL characters( const OUString& aChars ) override; + +}; + +OUString CallbackDocumentHandler::getNamespacePrefixFromToken( sal_Int32 nToken ) +{ + if ( ( nToken & 0xffff0000 ) != 0 ) + { + Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff0000 ); + return OUString( reinterpret_cast< const char* >( + aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 ); + } + else + return OUString(); +} + +OUString CallbackDocumentHandler::getNameFromToken( sal_Int32 nToken ) +{ + Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff ); + return OUString( reinterpret_cast< const char* >( + aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 ); +} + +CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, + rtl::Reference< NamespaceHandler > const & rNamespaceHandler, + Reference< XFastTokenHandler > const & xTokenHandler) +{ + m_xDocumentHandler.set( xDocumentHandler ); + m_aNamespaceHandler = rNamespaceHandler; + m_xTokenHandler.set( xTokenHandler ); +} + +void SAL_CALL CallbackDocumentHandler::startDocument() +{ + if ( m_xDocumentHandler.is() ) + m_xDocumentHandler->startDocument(); +} + +void SAL_CALL CallbackDocumentHandler::endDocument() +{ + if ( m_xDocumentHandler.is() ) + m_xDocumentHandler->endDocument(); +} + +void SAL_CALL CallbackDocumentHandler::processingInstruction( const OUString& rTarget, const OUString& rData ) +{ + if ( m_xDocumentHandler.is() ) + m_xDocumentHandler->processingInstruction( rTarget, rData ); +} + +void SAL_CALL CallbackDocumentHandler::setDocumentLocator( const Reference< XLocator >& xLocator ) +{ + if ( m_xDocumentHandler.is() ) + m_xDocumentHandler->setDocumentLocator( xLocator ); +} + +void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32 nElement , const Reference< XFastAttributeList >& Attribs ) +{ + const OUString& rPrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ); + const OUString& rLocalName = CallbackDocumentHandler::getNameFromToken( nElement ); + startUnknownElement( aDefaultNamespace, (rPrefix.isEmpty())? rLocalName : rPrefix + aNamespaceSeparator + rLocalName, Attribs ); +} + +void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& /*Namespace*/, const OUString& Name, const Reference< XFastAttributeList >& Attribs ) +{ + if ( !m_xDocumentHandler.is() ) + return; + + rtl::Reference < comphelper::AttributeList > rAttrList = new comphelper::AttributeList; + m_aNamespaceHandler->addNSDeclAttributes( rAttrList ); + + const Sequence< xml::FastAttribute > fastAttribs = Attribs->getFastAttributes(); + for (const auto& rAttr : fastAttribs) + { + const OUString& rAttrValue = rAttr.Value; + sal_Int32 nToken = rAttr.Token; + const OUString& rAttrNamespacePrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nToken ); + OUString sAttrName = CallbackDocumentHandler::getNameFromToken( nToken ); + if ( !rAttrNamespacePrefix.isEmpty() ) + sAttrName = rAttrNamespacePrefix + aNamespaceSeparator + sAttrName; + + rAttrList->AddAttribute( sAttrName, rAttrValue ); + } + + const Sequence< xml::Attribute > unknownAttribs = Attribs->getUnknownAttributes(); + for (const auto& rAttr : unknownAttribs) + { + const OUString& rAttrValue = rAttr.Value; + const OUString& rAttrName = rAttr.Name; + + rAttrList->AddAttribute( rAttrName, rAttrValue ); + } + m_xDocumentHandler->startElement( Name, rAttrList ); +} + +void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32 nElement ) +{ + const OUString& rPrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ); + const OUString& rLocalName = CallbackDocumentHandler::getNameFromToken( nElement ); + endUnknownElement( aDefaultNamespace, (rPrefix.isEmpty())? rLocalName : rPrefix + aNamespaceSeparator + rLocalName ); +} + + +void SAL_CALL CallbackDocumentHandler::endUnknownElement( const OUString& /*Namespace*/, const OUString& Name ) +{ + if ( m_xDocumentHandler.is() ) + { + m_xDocumentHandler->endElement( Name ); + } +} + +Reference< XFastContextHandler > SAL_CALL CallbackDocumentHandler::createFastChildContext( sal_Int32/* nElement */, const Reference< XFastAttributeList >&/* Attribs */ ) +{ + return this; +} + + +Reference< XFastContextHandler > SAL_CALL CallbackDocumentHandler::createUnknownChildContext( const OUString&/* Namespace */, const OUString&/* Name */, const Reference< XFastAttributeList >&/* Attribs */ ) +{ + return this; +} + +void SAL_CALL CallbackDocumentHandler::characters( const OUString& aChars ) +{ + if ( m_xDocumentHandler.is() ) + m_xDocumentHandler->characters( aChars ); +} + +SaxLegacyFastParser::SaxLegacyFastParser( ) : m_aNamespaceHandler( new NamespaceHandler ), + m_xParser(FastParser::create(::comphelper::getProcessComponentContext() )) +{ + m_xParser->setNamespaceHandler( m_aNamespaceHandler ); +} + +void SAL_CALL SaxLegacyFastParser::initialize(Sequence< Any > const& rArguments ) +{ + if (!rArguments.hasElements()) + return; + + Reference< XFastTokenHandler > xTokenHandler; + OUString str; + if ( ( rArguments[0] >>= xTokenHandler ) && xTokenHandler.is() ) + { + m_xTokenHandler.set( xTokenHandler ); + } + else if ( ( rArguments[0] >>= str ) && "registerNamespaces" == str ) + { + css::beans::Pair< OUString, sal_Int32 > rPair; + for (sal_Int32 i = 1; i < rArguments.getLength(); i++ ) + { + rArguments[i] >>= rPair; + m_xParser->registerNamespace( rPair.First, rPair.Second ); + } + } + else + { + uno::Reference<lang::XInitialization> const xInit(m_xParser, + uno::UNO_QUERY_THROW); + xInit->initialize( rArguments ); + } +} + +void SaxLegacyFastParser::parseStream( const InputSource& structSource ) +{ + m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler, + m_aNamespaceHandler, m_xTokenHandler ) ); + m_xParser->setTokenHandler( m_xTokenHandler ); + m_xParser->parseStream( structSource ); +} + +void SaxLegacyFastParser::setDocumentHandler( const Reference< XDocumentHandler > & xHandler ) +{ + m_xDocumentHandler = xHandler; +} + +void SaxLegacyFastParser::setErrorHandler( const Reference< XErrorHandler > & xHandler ) +{ + m_xParser->setErrorHandler( xHandler ); +} + +void SaxLegacyFastParser::setDTDHandler( const Reference < XDTDHandler > &/* xHandler */ ) +{ + +} + +void SaxLegacyFastParser::setEntityResolver( const Reference< XEntityResolver >& xResolver ) +{ + m_xParser->setEntityResolver( xResolver ); +} + +void SaxLegacyFastParser::setLocale( const Locale &locale ) +{ + m_xParser->setLocale( locale ); +} + +OUString SaxLegacyFastParser::getImplementationName() +{ + return "com.sun.star.comp.extensions.xml.sax.LegacyFastParser"; +} + +sal_Bool SaxLegacyFastParser::supportsService(const OUString& ServiceName) +{ + return cppu::supportsService(this, ServiceName); +} + +Sequence< OUString > SaxLegacyFastParser::getSupportedServiceNames() +{ + return { "com.sun.star.xml.sax.LegacyFastParser" }; +} + +} //namespace + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_comp_extensions_xml_sax_LegacyFastParser_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new SaxLegacyFastParser); +} + + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx new file mode 100644 index 0000000000..7d9e514c34 --- /dev/null +++ b/sax/source/tools/CachedOutputStream.hxx @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX +#define INCLUDED_SAX_SOURCE_TOOLS_CACHEDOUTPUTSTREAM_HXX + +#include <sal/types.h> + +#include <com/sun/star/io/XOutputStream.hpp> +#include <com/sun/star/uno/Sequence.hxx> + +#include <cstring> +#include <memory> + +namespace sax_fastparser { + +class ForMergeBase +{ +public: + virtual ~ForMergeBase() {} + virtual void append( const css::uno::Sequence<sal_Int8>& rWhat ) = 0; +}; + +class CachedOutputStream +{ + /// When buffer hits this size, it's written to mxOutputStream + static const sal_Int32 mnMaximumSize = 0x100000; // 1Mbyte + + /// ForMerge structure is used for sorting elements in Writer + std::shared_ptr< ForMergeBase > mpForMerge; + const css::uno::Sequence<sal_Int8> mpCache; + /// Output stream, usually writing data into files. + css::uno::Reference< css::io::XOutputStream > mxOutputStream; + uno_Sequence *pSeq; + sal_Int32 mnCacheWrittenSize; + bool mbWriteToOutStream; + +public: + CachedOutputStream() : mpCache(mnMaximumSize) + , pSeq(mpCache.get()) + , mnCacheWrittenSize(0) + , mbWriteToOutStream(true) + {} + + const css::uno::Reference< css::io::XOutputStream >& getOutputStream() const + { + return mxOutputStream; + } + + void setOutputStream( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) + { + mxOutputStream = xOutputStream; + } + + void setOutput( std::shared_ptr< ForMergeBase > pForMerge ) + { + flush(); + mbWriteToOutStream = false; + mpForMerge = pForMerge; + } + + void resetOutputToStream() + { + flush(); + mbWriteToOutStream = true; + mpForMerge.reset(); + } + + /// cache string and if limit is hit, flush + void writeBytes( const sal_Int8* pStr, sal_Int32 nLen ) + { + // Write when the buffer gets big enough + if (mnCacheWrittenSize + nLen > mnMaximumSize) + { + flush(); + + // Writer does some elements sorting, so it can accumulate + // pretty big strings in FastSaxSerializer::ForMerge. + // In that case, just flush data and write immediately. + if (nLen > mnMaximumSize) + { + if (mbWriteToOutStream) + mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) ); + else + mpForMerge->append( css::uno::Sequence<sal_Int8>(pStr, nLen) ); + return; + } + } + + memcpy(pSeq->elements + mnCacheWrittenSize, pStr, nLen); + mnCacheWrittenSize += nLen; + } + + /// immediately write buffer into mxOutputStream and clear + void flush() + { + // resize the Sequence to written size + pSeq->nElements = mnCacheWrittenSize; + if (mbWriteToOutStream) + mxOutputStream->writeBytes( mpCache ); + else + mpForMerge->append( mpCache ); + // and next time write to the beginning + mnCacheWrittenSize = 0; + } +}; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx new file mode 100644 index 0000000000..818d04a9bd --- /dev/null +++ b/sax/source/tools/converter.cxx @@ -0,0 +1,2360 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sax/tools/converter.hxx> + +#include <com/sun/star/i18n/UnicodeType.hpp> +#include <com/sun/star/util/DateTime.hpp> +#include <com/sun/star/util/Date.hpp> +#include <com/sun/star/util/Duration.hpp> +#include <com/sun/star/util/Time.hpp> +#include <optional> + +#include <rtl/ustrbuf.hxx> +#include <rtl/math.hxx> +#include <rtl/character.hxx> +#include <sal/log.hxx> +#include <o3tl/string_view.hxx> +#include <o3tl/typed_flags_set.hxx> +#include <o3tl/unit_conversion.hxx> +#include <osl/diagnose.h> +#include <tools/long.hxx> + +#include <algorithm> +#include <string_view> + +using namespace com::sun::star; +using namespace com::sun::star::uno; +using namespace com::sun::star::util; +using namespace ::com::sun::star::i18n; + + +namespace sax { + +const std::string_view gpsMM = "mm"; +const std::string_view gpsCM = "cm"; +const std::string_view gpsPT = "pt"; +const std::string_view gpsINCH = "in"; +const std::string_view gpsPC = "pc"; + +const sal_Int8 XML_MAXDIGITSCOUNT_TIME = 14; + +static sal_Int64 toInt64_WithLength(const sal_Unicode * str, sal_Int16 radix, sal_Int32 nStrLength ) +{ + return rtl_ustr_toInt64_WithLength(str, radix, nStrLength); +} +static sal_Int64 toInt64_WithLength(const char * str, sal_Int16 radix, sal_Int32 nStrLength ) +{ + return rtl_str_toInt64_WithLength(str, radix, nStrLength); +} + +namespace +{ +o3tl::Length Measure2O3tlUnit(sal_Int16 nUnit) +{ + switch (nUnit) + { + case MeasureUnit::TWIP: + return o3tl::Length::twip; + case MeasureUnit::POINT: + return o3tl::Length::pt; + case MeasureUnit::MM_10TH: + return o3tl::Length::mm10; + case MeasureUnit::MM_100TH: + return o3tl::Length::mm100; + case MeasureUnit::MM: + return o3tl::Length::mm; + case MeasureUnit::CM: + return o3tl::Length::cm; + default: + SAL_WARN("sax", "unit not supported for length"); + [[fallthrough]]; + case MeasureUnit::INCH: + return o3tl::Length::in; + } +} + +std::string_view Measure2UnitString(sal_Int16 nUnit) +{ + switch (nUnit) + { + case MeasureUnit::TWIP: + return gpsPC; // ?? + case MeasureUnit::POINT: + return gpsPT; + case MeasureUnit::MM_10TH: + case MeasureUnit::MM_100TH: + return {}; + case MeasureUnit::MM: + return gpsMM; + case MeasureUnit::CM: + return gpsCM; + case MeasureUnit::INCH: + default: + return gpsINCH; + } +} + +template <typename V> bool wordEndsWith(V string, std::string_view expected) +{ + V substr = string.substr(0, expected.size()); + return std::equal(substr.begin(), substr.end(), expected.begin(), expected.end(), + [](sal_uInt32 c1, sal_uInt32 c2) { return rtl::toAsciiLowerCase(c1) == c2; }) + && (string.size() == expected.size() || string[expected.size()] == ' '); +} + +} + +/** convert string to measure using optional min and max values*/ +template<typename V> +static bool lcl_convertMeasure( sal_Int32& rValue, + V rString, + sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */, + sal_Int32 nMin /* = SAL_MIN_INT32 */, + sal_Int32 nMax /* = SAL_MAX_INT32 */ ) +{ + bool bNeg = false; + double nVal = 0; + + sal_Int32 nPos = 0; + sal_Int32 const nLen = rString.size(); + + // skip white space + while( (nPos < nLen) && (rString[nPos] <= ' ') ) + nPos++; + + if( nPos < nLen && '-' == rString[nPos] ) + { + bNeg = true; + nPos++; + } + + // get number + while( nPos < nLen && + '0' <= rString[nPos] && + '9' >= rString[nPos] ) + { + // TODO: check overflow! + nVal *= 10; + nVal += (rString[nPos] - '0'); + nPos++; + } + if( nPos < nLen && '.' == rString[nPos] ) + { + nPos++; + double nDiv = 1.; + + while( nPos < nLen && + '0' <= rString[nPos] && + '9' >= rString[nPos] ) + { + // TODO: check overflow! + nDiv *= 10; + nVal += ( static_cast<double>(rString[nPos] - '0') / nDiv ); + nPos++; + } + } + + // skip white space + while( (nPos < nLen) && (rString[nPos] <= ' ') ) + nPos++; + + if( nPos < nLen ) + { + + if( MeasureUnit::PERCENT == nTargetUnit ) + { + if( '%' != rString[nPos] ) + return false; + } + else if( MeasureUnit::PIXEL == nTargetUnit ) + { + if( nPos + 1 >= nLen || + ('p' != rString[nPos] && + 'P' != rString[nPos])|| + ('x' != rString[nPos+1] && + 'X' != rString[nPos+1]) ) + return false; + } + else + { + OSL_ENSURE( MeasureUnit::TWIP == nTargetUnit || MeasureUnit::POINT == nTargetUnit || + MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit || + MeasureUnit::PIXEL == nTargetUnit, "unit is not supported"); + + o3tl::Length eFrom = o3tl::Length::invalid; + + if( MeasureUnit::TWIP == nTargetUnit ) + { + switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos])) + { + case u'c': + if (wordEndsWith(rString.substr(nPos + 1), "m")) + eFrom = o3tl::Length::cm; + break; + case u'i': + if (wordEndsWith(rString.substr(nPos + 1), "n")) + eFrom = o3tl::Length::in; + break; + case u'm': + if (wordEndsWith(rString.substr(nPos + 1), "m")) + eFrom = o3tl::Length::mm; + break; + case u'p': + if (wordEndsWith(rString.substr(nPos + 1), "t")) + eFrom = o3tl::Length::pt; + else if (wordEndsWith(rString.substr(nPos + 1), "c")) + eFrom = o3tl::Length::pc; + break; + } + } + else if( MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit ) + { + switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos])) + { + case u'c': + if (wordEndsWith(rString.substr(nPos + 1), "m")) + eFrom = o3tl::Length::cm; + break; + case u'i': + if (wordEndsWith(rString.substr(nPos + 1), "n")) + eFrom = o3tl::Length::in; + break; + case u'm': + if (wordEndsWith(rString.substr(nPos + 1), "m")) + eFrom = o3tl::Length::mm; + break; + case u'p': + if (wordEndsWith(rString.substr(nPos + 1), "t")) + eFrom = o3tl::Length::pt; + else if (wordEndsWith(rString.substr(nPos + 1), "c")) + eFrom = o3tl::Length::pc; + else if (wordEndsWith(rString.substr(nPos + 1), "x")) + eFrom = o3tl::Length::px; + break; + } + } + else if( MeasureUnit::POINT == nTargetUnit ) + { + if (wordEndsWith(rString.substr(nPos), "pt")) + eFrom = o3tl::Length::pt; + } + + if (eFrom == o3tl::Length::invalid) + return false; + + // TODO: check overflow + nVal = o3tl::convert(nVal, eFrom, Measure2O3tlUnit(nTargetUnit)); + } + } + + nVal += .5; + if( bNeg ) + nVal = -nVal; + + if( nVal <= static_cast<double>(nMin) ) + rValue = nMin; + else if( nVal >= static_cast<double>(nMax) ) + rValue = nMax; + else + rValue = static_cast<sal_Int32>(nVal); + + return true; +} + +/** convert string to measure using optional min and max values*/ +bool Converter::convertMeasure( sal_Int32& rValue, + std::u16string_view rString, + sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */, + sal_Int32 nMin /* = SAL_MIN_INT32 */, + sal_Int32 nMax /* = SAL_MAX_INT32 */ ) +{ + return lcl_convertMeasure(rValue, rString, nTargetUnit, nMin, nMax); +} + +/** convert string to measure using optional min and max values*/ +bool Converter::convertMeasure( sal_Int32& rValue, + std::string_view rString, + sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */, + sal_Int32 nMin /* = SAL_MIN_INT32 */, + sal_Int32 nMax /* = SAL_MAX_INT32 */ ) +{ + return lcl_convertMeasure(rValue, rString, nTargetUnit, nMin, nMax); +} + + +/** convert measure in given unit to string with given unit */ +void Converter::convertMeasure( OUStringBuffer& rBuffer, + sal_Int32 nMeasure, + sal_Int16 nSourceUnit /* = MeasureUnit::MM_100TH */, + sal_Int16 nTargetUnit /* = MeasureUnit::INCH */ ) +{ + if( nSourceUnit == MeasureUnit::PERCENT ) + { + OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT, + "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" ); + + rBuffer.append( nMeasure ); + rBuffer.append( '%' ); + + return; + } + sal_Int64 nValue(nMeasure); // extend to 64-bit first to avoid overflow + // the sign is processed separately + if (nValue < 0) + { + nValue = -nValue; + rBuffer.append( '-' ); + } + + o3tl::Length eFrom = o3tl::Length::in, eTo = o3tl::Length::in; + int nFac = 100; // used to get specific number of decimals (2 by default) + std::string_view psUnit; + switch( nSourceUnit ) + { + case MeasureUnit::TWIP: + eFrom = o3tl::Length::twip; + switch( nTargetUnit ) + { + case MeasureUnit::MM_100TH: + case MeasureUnit::MM_10TH: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit,"output unit not supported for twip values" ); + [[fallthrough]]; + case MeasureUnit::MM: + eTo = o3tl::Length::mm; + nFac = 100; + psUnit = gpsMM; + break; + + case MeasureUnit::CM: + eTo = o3tl::Length::cm; + nFac = 1000; + psUnit = gpsCM; + break; + + case MeasureUnit::POINT: + eTo = o3tl::Length::pt; + nFac = 100; + psUnit = gpsPT; + break; + + case MeasureUnit::INCH: + default: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, + "output unit not supported for twip values" ); + nFac = 10000; + psUnit = gpsINCH; + break; + } + break; + + case MeasureUnit::POINT: + // 1pt = 1pt (exactly) + OSL_ENSURE( MeasureUnit::POINT == nTargetUnit, + "output unit not supported for pt values" ); + eFrom = eTo = o3tl::Length::pt; + nFac = 1; + psUnit = gpsPT; + break; + case MeasureUnit::MM_10TH: + case MeasureUnit::MM_100TH: + { + int nFac2 = (MeasureUnit::MM_100TH == nSourceUnit) ? 100 : 10; + eFrom = Measure2O3tlUnit(nSourceUnit); + switch( nTargetUnit ) + { + case MeasureUnit::MM_100TH: + case MeasureUnit::MM_10TH: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, + "output unit not supported for 1/100mm values" ); + [[fallthrough]]; + case MeasureUnit::MM: + eTo = o3tl::Length::mm; + nFac = nFac2; + psUnit = gpsMM; + break; + + case MeasureUnit::CM: + eTo = o3tl::Length::cm; + nFac = 10*nFac2; + psUnit = gpsCM; + break; + + case MeasureUnit::POINT: + eTo = o3tl::Length::pt; + nFac = nFac2; + psUnit = gpsPT; + break; + + case MeasureUnit::INCH: + default: + OSL_ENSURE( MeasureUnit::INCH == nTargetUnit, + "output unit not supported for 1/100mm values" ); + nFac = 100*nFac2; + psUnit = gpsINCH; + break; + } + break; + } + default: + OSL_ENSURE(false, "sax::Converter::convertMeasure(): " + "source unit not supported"); + break; + } + + nValue = o3tl::convert(nValue * nFac, eFrom, eTo); + + rBuffer.append( static_cast<sal_Int64>(nValue / nFac) ); + if (nFac > 1 && (nValue % nFac) != 0) + { + rBuffer.append( '.' ); + while (nFac > 1 && (nValue % nFac) != 0) + { + nFac /= 10; + rBuffer.append( static_cast<sal_Int32>((nValue / nFac) % 10) ); + } + } + + if (psUnit.length() > 0) + rBuffer.appendAscii(psUnit.data(), psUnit.length()); +} + +/** convert string to boolean */ +bool Converter::convertBool( bool& rBool, std::u16string_view rString ) +{ + rBool = rString == u"true"; + + return rBool || (rString == u"false"); +} + +/** convert string to boolean */ +bool Converter::convertBool( bool& rBool, std::string_view rString ) +{ + rBool = rString == "true"; + + return rBool || (rString == "false"); +} + +/** convert boolean to string */ +void Converter::convertBool( OUStringBuffer& rBuffer, bool bValue ) +{ + rBuffer.append( bValue ); +} + +/** convert string to percent */ +bool Converter::convertPercent( sal_Int32& rPercent, std::u16string_view rString ) +{ + return convertMeasure( rPercent, rString, MeasureUnit::PERCENT ); +} + +/** convert string to percent */ +bool Converter::convertPercent( sal_Int32& rPercent, std::string_view rString ) +{ + return convertMeasure( rPercent, rString, MeasureUnit::PERCENT ); +} + +/** convert percent to string */ +void Converter::convertPercent( OUStringBuffer& rBuffer, sal_Int32 nValue ) +{ + rBuffer.append( nValue ); + rBuffer.append( '%' ); +} + +/** convert string to pixel measure */ +bool Converter::convertMeasurePx( sal_Int32& rPixel, std::u16string_view rString ) +{ + return convertMeasure( rPixel, rString, MeasureUnit::PIXEL ); +} + +/** convert string to pixel measure */ +bool Converter::convertMeasurePx( sal_Int32& rPixel, std::string_view rString ) +{ + return convertMeasure( rPixel, rString, MeasureUnit::PIXEL ); +} + +/** convert pixel measure to string */ +void Converter::convertMeasurePx( OUStringBuffer& rBuffer, sal_Int32 nValue ) +{ + rBuffer.append( nValue ); + rBuffer.append( 'p' ); + rBuffer.append( 'x' ); +} + +static int lcl_gethex( int nChar ) +{ + if( nChar >= '0' && nChar <= '9' ) + return nChar - '0'; + else if( nChar >= 'a' && nChar <= 'f' ) + return nChar - 'a' + 10; + else if( nChar >= 'A' && nChar <= 'F' ) + return nChar - 'A' + 10; + else + return 0; +} + +/** convert string to rgb color */ +template<typename V> +static bool lcl_convertColor( sal_Int32& rColor, V rValue ) +{ + if( rValue.size() != 7 || rValue[0] != '#' ) + return false; + + rColor = lcl_gethex( rValue[1] ) * 16 + lcl_gethex( rValue[2] ); + rColor <<= 8; + + rColor |= lcl_gethex( rValue[3] ) * 16 + lcl_gethex( rValue[4] ); + rColor <<= 8; + + rColor |= lcl_gethex( rValue[5] ) * 16 + lcl_gethex( rValue[6] ); + + return true; +} + +/** convert string to rgb color */ +bool Converter::convertColor( sal_Int32& rColor, std::u16string_view rValue ) +{ + return lcl_convertColor(rColor, rValue); +} + +/** convert string to rgb color */ +bool Converter::convertColor( sal_Int32& rColor, std::string_view rValue ) +{ + return lcl_convertColor(rColor, rValue); +} + +const char aHexTab[] = "0123456789abcdef"; + +/** convert color to string */ +void Converter::convertColor( OUStringBuffer& rBuffer, sal_Int32 nColor ) +{ + rBuffer.append( '#' ); + + sal_uInt8 nCol = static_cast<sal_uInt8>(nColor >> 16); + rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) ); + rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) ); + + nCol = static_cast<sal_uInt8>(nColor >> 8); + rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) ); + rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) ); + + nCol = static_cast<sal_uInt8>(nColor); + rBuffer.append( sal_Unicode( aHexTab[ nCol >> 4 ] ) ); + rBuffer.append( sal_Unicode( aHexTab[ nCol & 0xf ] ) ); +} + +/** convert string to number with optional min and max values */ +bool Converter::convertNumber( sal_Int32& rValue, + std::u16string_view aString, + sal_Int32 nMin, sal_Int32 nMax ) +{ + rValue = 0; + sal_Int64 nNumber = 0; + bool bRet = convertNumber64(nNumber,aString,nMin,nMax); + if ( bRet ) + rValue = static_cast<sal_Int32>(nNumber); + return bRet; +} + +/** convert string to number with optional min and max values */ +bool Converter::convertNumber( sal_Int32& rValue, + std::string_view aString, + sal_Int32 nMin, sal_Int32 nMax ) +{ + rValue = 0; + sal_Int64 nNumber = 0; + bool bRet = convertNumber64(nNumber,aString,nMin,nMax); + if ( bRet ) + rValue = static_cast<sal_Int32>(nNumber); + return bRet; +} + +/** convert string to 64-bit number with optional min and max values */ +template<typename V> +static bool lcl_convertNumber64( sal_Int64& rValue, + V aString, + sal_Int64 nMin, sal_Int64 nMax ) +{ + sal_Int32 nPos = 0; + sal_Int32 const nLen = aString.size(); + + // skip white space + while( (nPos < nLen) && (aString[nPos] <= ' ') ) + nPos++; + + sal_Int32 nNumberStartPos = nPos; + + if( nPos < nLen && '-' == aString[nPos] ) + { + nPos++; + } + + // get number + while( nPos < nLen && + '0' <= aString[nPos] && + '9' >= aString[nPos] ) + { + nPos++; + } + + rValue = toInt64_WithLength(aString.data() + nNumberStartPos, 10, nPos - nNumberStartPos); + + if( rValue < nMin ) + rValue = nMin; + else if( rValue > nMax ) + rValue = nMax; + + return ( nPos == nLen && rValue >= nMin && rValue <= nMax ); +} + +/** convert string to 64-bit number with optional min and max values */ +bool Converter::convertNumber64( sal_Int64& rValue, + std::u16string_view aString, + sal_Int64 nMin, sal_Int64 nMax ) +{ + return lcl_convertNumber64(rValue, aString, nMin, nMax); +} + +/** convert string to 64-bit number with optional min and max values */ +bool Converter::convertNumber64( sal_Int64& rValue, + std::string_view aString, + sal_Int64 nMin, sal_Int64 nMax ) +{ + return lcl_convertNumber64(rValue, aString, nMin, nMax); +} + + +/** convert double number to string (using ::rtl::math) */ +void Converter::convertDouble( OUStringBuffer& rBuffer, + double fNumber, + bool bWriteUnits, + sal_Int16 nSourceUnit, + sal_Int16 nTargetUnit) +{ + if(MeasureUnit::PERCENT == nSourceUnit) + { + OSL_ENSURE( nTargetUnit == MeasureUnit::PERCENT, "MeasureUnit::PERCENT only maps to MeasureUnit::PERCENT!" ); + ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); + if(bWriteUnits) + rBuffer.append('%'); + } + else + { + OUStringBuffer sUnit; + double fFactor = GetConversionFactor(sUnit, nSourceUnit, nTargetUnit); + if(fFactor != 1.0) + fNumber *= fFactor; + ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); + if(bWriteUnits) + rBuffer.append(sUnit); + } +} + +/** convert double number to string (using ::rtl::math) */ +void Converter::convertDouble( OUStringBuffer& rBuffer, double fNumber) +{ + ::rtl::math::doubleToUStringBuffer( rBuffer, fNumber, rtl_math_StringFormat_Automatic, rtl_math_DecimalPlaces_Max, '.', true); +} + +/** convert string to double number (using ::rtl::math) */ +bool Converter::convertDouble(double& rValue, + std::u16string_view rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit) +{ + if (!convertDouble(rValue, rString)) + return false; + + OUStringBuffer sUnit; + // fdo#48969: switch source and target because factor is used to divide! + double const fFactor = + GetConversionFactor(sUnit, nTargetUnit, nSourceUnit); + if(fFactor != 1.0 && fFactor != 0.0) + rValue /= fFactor; + return true; +} + +/** convert string to double number (using ::rtl::math) */ +bool Converter::convertDouble(double& rValue, + std::string_view rString, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit) +{ + if (!convertDouble(rValue, rString)) + return false; + + OStringBuffer sUnit; + // fdo#48969: switch source and target because factor is used to divide! + double const fFactor = + GetConversionFactor(sUnit, nTargetUnit, nSourceUnit); + if(fFactor != 1.0 && fFactor != 0.0) + rValue /= fFactor; + return true; +} + +/** convert string to double number (using ::rtl::math) */ +bool Converter::convertDouble(double& rValue, std::u16string_view rString) +{ + rtl_math_ConversionStatus eStatus; + rValue = rtl_math_uStringToDouble(rString.data(), + rString.data() + rString.size(), + /*cDecSeparator*/'.', /*cGroupSeparator*/',', + &eStatus, nullptr); + return ( eStatus == rtl_math_ConversionStatus_Ok ); +} + +/** convert string to double number (using ::rtl::math) */ +bool Converter::convertDouble(double& rValue, std::string_view rString) +{ + rtl_math_ConversionStatus eStatus; + rValue = rtl_math_stringToDouble(rString.data(), + rString.data() + rString.size(), + /*cDecSeparator*/'.', /*cGroupSeparator*/',', + &eStatus, nullptr); + return ( eStatus == rtl_math_ConversionStatus_Ok ); +} + +/** convert number, 10th of degrees with range [0..3600] to SVG angle */ +void Converter::convertAngle(OUStringBuffer& rBuffer, sal_Int16 const nAngle, + SvtSaveOptions::ODFSaneDefaultVersion const nVersion) +{ + if (nVersion < SvtSaveOptions::ODFSVER_012 || nVersion == SvtSaveOptions::ODFSVER_012_EXT_COMPAT) + { + // wrong, but backward compatible with OOo/LO < 4.4 + rBuffer.append(static_cast<sal_Int32>(nAngle)); + } + else + { // OFFICE-3774 tdf#89475 write valid ODF 1.2 angle; needs LO 4.4 to import + double fAngle(double(nAngle) / 10.0); + ::sax::Converter::convertDouble(rBuffer, fAngle); + rBuffer.append("deg"); + } +} + +/** convert SVG angle to number, 10th of degrees with range [0..3600] */ +bool Converter::convertAngle(sal_Int16& rAngle, std::u16string_view rString, + bool const isWrongOOo10thDegAngle) +{ + // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's + // degrees, while OOo has historically used 10th of degrees :( + // So import degrees when we see the "deg" suffix but continue with 10th of + // degrees for now for the sake of existing OOo/LO documents, until the + // new versions that can read "deg" suffix are widely deployed and we can + // start to write the "deg" suffix. + sal_Int32 nValue(0); + double fValue(0.0); + bool bRet = ::sax::Converter::convertDouble(fValue, rString); + if (std::u16string_view::npos != rString.find(u"deg")) + { + nValue = fValue * 10.0; + } + else if (std::u16string_view::npos != rString.find(u"grad")) + { + nValue = (fValue * 9.0 / 10.0) * 10.0; + } + else if (std::u16string_view::npos != rString.find(u"rad")) + { + nValue = basegfx::rad2deg<10>(fValue); + } + else // no explicit unit + { + if (isWrongOOo10thDegAngle) + { + nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0 + } + else + { + nValue = fValue * 10.0; // ODF 1.2 + } + } + // limit to valid range [0..3600] + nValue = nValue % 3600; + if (nValue < 0) + { + nValue += 3600; + } + assert(0 <= nValue && nValue <= 3600); + if (bRet) + { + rAngle = sal::static_int_cast<sal_Int16>(nValue); + } + return bRet; +} + +/** convert SVG angle to number, 10th of degrees with range [0..3600] */ +bool Converter::convertAngle(sal_Int16& rAngle, std::string_view rString, + bool const isWrongOOo10thDegAngle) +{ + // ODF 1.1 leaves it undefined what the number means, but ODF 1.2 says it's + // degrees, while OOo has historically used 10th of degrees :( + // So import degrees when we see the "deg" suffix but continue with 10th of + // degrees for now for the sake of existing OOo/LO documents, until the + // new versions that can read "deg" suffix are widely deployed and we can + // start to write the "deg" suffix. + sal_Int32 nValue(0); + double fValue(0.0); + bool bRet = ::sax::Converter::convertDouble(fValue, rString); + if (std::string_view::npos != rString.find("deg")) + { + nValue = fValue * 10.0; + } + else if (std::string_view::npos != rString.find("grad")) + { + nValue = (fValue * 9.0 / 10.0) * 10.0; + } + else if (std::string_view::npos != rString.find("rad")) + { + nValue = basegfx::rad2deg<10>(fValue); + } + else // no explicit unit + { + if (isWrongOOo10thDegAngle) + { + nValue = fValue; // wrong, but backward compatible with OOo/LO < 7.0 + } + else + { + nValue = fValue * 10.0; // ODF 1.2 + } + } + // limit to valid range [0..3600] + nValue = nValue % 3600; + if (nValue < 0) + { + nValue += 3600; + } + assert(0 <= nValue && nValue <= 3600); + if (bRet) + { + rAngle = sal::static_int_cast<sal_Int16>(nValue); + } + return bRet; +} + +/** convert double to ISO "duration" string; negative durations allowed */ +void Converter::convertDuration(OUStringBuffer& rBuffer, + const double fTime) +{ + double fValue = fTime; + + // take care of negative durations as specified in: + // XML Schema, W3C Working Draft 07 April 2000, section 3.2.6.1 + if (fValue < 0.0) + { + rBuffer.append('-'); + fValue = - fValue; + } + + rBuffer.append( "PT" ); + fValue *= 24; + double fHoursValue = ::rtl::math::approxFloor (fValue); + fValue -= fHoursValue; + fValue *= 60; + double fMinsValue = ::rtl::math::approxFloor (fValue); + fValue -= fMinsValue; + fValue *= 60; + double fSecsValue = ::rtl::math::approxFloor (fValue); + fValue -= fSecsValue; + double fNanoSecsValue; + if (fValue > 0.00000000001) + fNanoSecsValue = ::rtl::math::round( fValue, XML_MAXDIGITSCOUNT_TIME - 5); + else + fNanoSecsValue = 0.0; + + if (fNanoSecsValue == 1.0) + { + fNanoSecsValue = 0.0; + fSecsValue += 1.0; + } + if (fSecsValue >= 60.0) + { + fSecsValue -= 60.0; + fMinsValue += 1.0; + } + if (fMinsValue >= 60.0) + { + fMinsValue -= 60.0; + fHoursValue += 1.0; + } + + if (fHoursValue < 10) + rBuffer.append( '0'); + rBuffer.append( sal_Int32( fHoursValue)); + rBuffer.append( 'H'); + if (fMinsValue < 10) + rBuffer.append( '0'); + rBuffer.append( sal_Int32( fMinsValue)); + rBuffer.append( 'M'); + if (fSecsValue < 10) + rBuffer.append( '0'); + rBuffer.append( sal_Int32( fSecsValue)); + if (fNanoSecsValue > 0.0) + { + OUString aNS( ::rtl::math::doubleToUString( fValue, + rtl_math_StringFormat_F, XML_MAXDIGITSCOUNT_TIME - 5, '.', + true)); + if ( aNS.getLength() > 2 ) + { + rBuffer.append( '.'); + rBuffer.append( aNS.subView(2) ); // strip "0." + } + } + rBuffer.append( 'S'); +} + +/** helper function of Converter::convertDuration */ +template<typename V> +static bool convertDurationHelper(double& rfTime, V pStr) +{ + // negative time duration? + bool bIsNegativeDuration = false; + if ( '-' == (*pStr) ) + { + bIsNegativeDuration = true; + pStr++; + } + + if ( *pStr != 'P' && *pStr != 'p' ) // duration must start with "P" + return false; + pStr++; + + OUStringBuffer sDoubleStr; + bool bSuccess = true; + bool bDone = false; + bool bTimePart = false; + bool bIsFraction = false; + sal_Int32 nDays = 0; + sal_Int32 nHours = 0; + sal_Int32 nMins = 0; + sal_Int32 nSecs = 0; + sal_Int32 nTemp = 0; + + while ( bSuccess && !bDone ) + { + sal_Unicode c = *(pStr++); + if ( !c ) // end + bDone = true; + else if ( '0' <= c && '9' >= c ) + { + if ( nTemp >= SAL_MAX_INT32 / 10 ) + bSuccess = false; + else + { + if ( !bIsFraction ) + { + nTemp *= 10; + nTemp += (c - u'0'); + } + else + { + sDoubleStr.append(c); + } + } + } + else if ( bTimePart ) + { + if ( c == 'H' || c == 'h' ) + { + nHours = nTemp; + nTemp = 0; + } + else if ( c == 'M' || c == 'm') + { + nMins = nTemp; + nTemp = 0; + } + else if ( (c == ',') || (c == '.') ) + { + nSecs = nTemp; + nTemp = 0; + bIsFraction = true; + sDoubleStr = "0."; + } + else if ( c == 'S' || c == 's' ) + { + if ( !bIsFraction ) + { + nSecs = nTemp; + nTemp = 0; + sDoubleStr = "0.0"; + } + } + else + bSuccess = false; // invalid character + } + else + { + if ( c == 'T' || c == 't' ) // "T" starts time part + bTimePart = true; + else if ( c == 'D' || c == 'd') + { + nDays = nTemp; + nTemp = 0; + } + else if ( c == 'Y' || c == 'y' || c == 'M' || c == 'm' ) + { + //! how many days is a year or month? + + OSL_FAIL( "years or months in duration: not implemented"); + bSuccess = false; + } + else + bSuccess = false; // invalid character + } + } + + if ( bSuccess ) + { + if ( nDays ) + nHours += nDays * 24; // add the days to the hours part + double fHour = nHours; + double fMin = nMins; + double fSec = nSecs; + double fFraction = o3tl::toDouble(sDoubleStr); + double fTempTime = fHour / 24; + fTempTime += fMin / (24 * 60); + fTempTime += fSec / (24 * 60 * 60); + fTempTime += fFraction / (24 * 60 * 60); + + // negative duration? + if ( bIsNegativeDuration ) + { + fTempTime = -fTempTime; + } + + rfTime = fTempTime; + } + return bSuccess; +} + +/** convert ISO "duration" string to double; negative durations allowed */ +bool Converter::convertDuration(double& rfTime, + std::string_view rString) +{ + std::string_view aTrimmed = o3tl::trim(rString); + const char* pStr = aTrimmed.data(); + + return convertDurationHelper(rfTime, pStr); +} + +/** convert util::Duration to ISO8601 "duration" string */ +void Converter::convertDuration(OUStringBuffer& rBuffer, + const ::util::Duration& rDuration) +{ + if (rDuration.Negative) + { + rBuffer.append('-'); + } + rBuffer.append('P'); + const bool bHaveDate(rDuration.Years != 0 || + rDuration.Months != 0 || + rDuration.Days != 0); + if (rDuration.Years) + { + rBuffer.append(static_cast<sal_Int32>(rDuration.Years)); + rBuffer.append('Y'); + } + if (rDuration.Months) + { + rBuffer.append(static_cast<sal_Int32>(rDuration.Months)); + rBuffer.append('M'); + } + if (rDuration.Days) + { + rBuffer.append(static_cast<sal_Int32>(rDuration.Days)); + rBuffer.append('D'); + } + if ( rDuration.Hours != 0 + || rDuration.Minutes != 0 + || rDuration.Seconds != 0 + || rDuration.NanoSeconds != 0 ) + { + rBuffer.append('T'); // time separator + if (rDuration.Hours) + { + rBuffer.append(static_cast<sal_Int32>(rDuration.Hours)); + rBuffer.append('H'); + } + if (rDuration.Minutes) + { + rBuffer.append(static_cast<sal_Int32>(rDuration.Minutes)); + rBuffer.append('M'); + } + if (rDuration.Seconds != 0 || rDuration.NanoSeconds != 0) + { + // seconds must not be omitted (i.e. ".42S" is not valid) + rBuffer.append(static_cast<sal_Int32>(rDuration.Seconds)); + if (rDuration.NanoSeconds) + { + OSL_ENSURE(rDuration.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999"); + rBuffer.append('.'); + std::ostringstream ostr; + ostr.fill('0'); + ostr.width(9); + ostr << rDuration.NanoSeconds; + rBuffer.appendAscii(ostr.str().c_str()); + } + rBuffer.append('S'); + } + } + else if (!bHaveDate) + { + // zero duration: XMLSchema-2 says there must be at least one component + rBuffer.append('0'); + rBuffer.append('D'); + } +} + +namespace { + +enum Result { R_NOTHING, R_OVERFLOW, R_SUCCESS }; + +} + +template <typename V> +static Result +readUnsignedNumber(V rString, + size_t & io_rnPos, sal_Int32 & o_rNumber) +{ + size_t nPos(io_rnPos); + + while (nPos < rString.size()) + { + const typename V::value_type c = rString[nPos]; + if (('0' > c) || (c > '9')) + break; + ++nPos; + } + + if (io_rnPos == nPos) // read something? + { + o_rNumber = -1; + return R_NOTHING; + } + + const sal_Int64 nTemp = toInt64_WithLength(rString.data() + io_rnPos, 10, nPos - io_rnPos); + + const bool bOverflow = (nTemp >= SAL_MAX_INT32); + + io_rnPos = nPos; + o_rNumber = nTemp; + return bOverflow ? R_OVERFLOW : R_SUCCESS; +} + +template<typename V> +static Result +readUnsignedNumberMaxDigits(int maxDigits, + V rString, size_t & io_rnPos, + sal_Int32 & o_rNumber) +{ + bool bOverflow(false); + sal_Int64 nTemp(0); + size_t nPos(io_rnPos); + OSL_ENSURE(maxDigits >= 0, "negative amount of digits makes no sense"); + + while (nPos < rString.size()) + { + const sal_Unicode c = rString[nPos]; + if (('0' <= c) && (c <= '9')) + { + if (maxDigits > 0) + { + nTemp *= 10; + nTemp += (c - u'0'); + if (nTemp >= SAL_MAX_INT32) + { + bOverflow = true; + } + --maxDigits; + } + } + else + { + break; + } + ++nPos; + } + + if (io_rnPos == nPos) // read something? + { + o_rNumber = -1; + return R_NOTHING; + } + + io_rnPos = nPos; + o_rNumber = nTemp; + return bOverflow ? R_OVERFLOW : R_SUCCESS; +} + +template<typename V> +static bool +readDurationT(V rString, size_t & io_rnPos) +{ + if ((io_rnPos < rString.size()) && + (rString[io_rnPos] == 'T' || rString[io_rnPos] == 't')) + { + ++io_rnPos; + return true; + } + return false; +} + +template<typename V> +static bool +readDurationComponent(V rString, + size_t & io_rnPos, sal_Int32 & io_rnTemp, bool & io_rbTimePart, + sal_Int32 & o_rnTarget, const sal_Unicode cLower, const sal_Unicode cUpper) +{ + if (io_rnPos < rString.size()) + { + if (cLower == rString[io_rnPos] || cUpper == rString[io_rnPos]) + { + ++io_rnPos; + if (-1 != io_rnTemp) + { + o_rnTarget = io_rnTemp; + io_rnTemp = -1; + if (!io_rbTimePart) + { + io_rbTimePart = readDurationT(rString, io_rnPos); + } + return (R_OVERFLOW != + readUnsignedNumber(rString, io_rnPos, io_rnTemp)); + } + else + { + return false; + } + } + } + return true; +} + +template <typename V> +static bool convertDurationHelper(util::Duration& rDuration, V string) +{ + size_t nPos(0); + + bool bIsNegativeDuration(false); + if (!string.empty() && ('-' == string[0])) + { + bIsNegativeDuration = true; + ++nPos; + } + + if (nPos < string.size() + && string[nPos] != 'P' && string[nPos] != 'p') // duration must start with "P" + { + return false; + } + + ++nPos; + + /// last read number; -1 == no valid number! always reset after using! + sal_Int32 nTemp(-1); + bool bTimePart(false); // have we read 'T'? + bool bSuccess(false); + sal_Int32 nYears(0); + sal_Int32 nMonths(0); + sal_Int32 nDays(0); + sal_Int32 nHours(0); + sal_Int32 nMinutes(0); + sal_Int32 nSeconds(0); + sal_Int32 nNanoSeconds(0); + + bTimePart = readDurationT(string, nPos); + bSuccess = (R_SUCCESS == readUnsignedNumber(string, nPos, nTemp)); + + if (!bTimePart && bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nYears, 'y', 'Y'); + } + + if (!bTimePart && bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nMonths, 'm', 'M'); + } + + if (!bTimePart && bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nDays, 'd', 'D'); + } + + if (bTimePart) + { + if (-1 == nTemp) // a 'T' must be followed by a component + { + bSuccess = false; + } + + if (bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nHours, 'h', 'H'); + } + + if (bSuccess) + { + bSuccess = readDurationComponent(string, nPos, nTemp, bTimePart, + nMinutes, 'm', 'M'); + } + + // eeek! seconds are icky. + if ((nPos < string.size()) && bSuccess) + { + if (string[nPos] == '.' || + string[nPos] == ',') + { + ++nPos; + if (-1 != nTemp) + { + nSeconds = nTemp; + nTemp = -1; + const sal_Int32 nStart(nPos); + bSuccess = readUnsignedNumberMaxDigits(9, string, nPos, nTemp) == R_SUCCESS; + if ((nPos < string.size()) && bSuccess) + { + if (-1 != nTemp) + { + nNanoSeconds = nTemp; + sal_Int32 nDigits = nPos - nStart; + assert(nDigits >= 0); + for (; nDigits < 9; ++nDigits) + { + nNanoSeconds *= 10; + } + nTemp=-1; + if ('S' == string[nPos] || 's' == string[nPos]) + { + ++nPos; + } + else + { + bSuccess = false; + } + } + else + { + bSuccess = false; + } + } + } + else + { + bSuccess = false; + } + } + else if ('S' == string[nPos] || 's' == string[nPos]) + { + ++nPos; + if (-1 != nTemp) + { + nSeconds = nTemp; + nTemp = -1; + } + else + { + bSuccess = false; + } + } + } + } + + if (nPos != string.size()) // string not processed completely? + { + bSuccess = false; + } + + if (nTemp != -1) // unprocessed number? + { + bSuccess = false; + } + + if (bSuccess) + { + rDuration.Negative = bIsNegativeDuration; + rDuration.Years = static_cast<sal_Int16>(nYears); + rDuration.Months = static_cast<sal_Int16>(nMonths); + rDuration.Days = static_cast<sal_Int16>(nDays); + rDuration.Hours = static_cast<sal_Int16>(nHours); + rDuration.Minutes = static_cast<sal_Int16>(nMinutes); + rDuration.Seconds = static_cast<sal_Int16>(nSeconds); + rDuration.NanoSeconds = nNanoSeconds; + } + + return bSuccess; +} + +/** convert ISO8601 "duration" string to util::Duration */ +bool Converter::convertDuration(util::Duration& rDuration, + std::u16string_view rString) +{ + return convertDurationHelper(rDuration, o3tl::trim(rString)); +} + +/** convert ISO8601 "duration" string to util::Duration */ +bool Converter::convertDuration(util::Duration& rDuration, + std::string_view rString) +{ + return convertDurationHelper(rDuration, o3tl::trim(rString)); +} + +static void +lcl_AppendTimezone(OUStringBuffer & i_rBuffer, int const nOffset) +{ + if (0 == nOffset) + { + i_rBuffer.append('Z'); + } + else + { + if (0 < nOffset) + { + i_rBuffer.append('+'); + } + else + { + i_rBuffer.append('-'); + } + const sal_Int32 nHours (abs(nOffset) / 60); + const sal_Int32 nMinutes(abs(nOffset) % 60); + SAL_WARN_IF(nHours > 14 || (nHours == 14 && nMinutes > 0), + "sax", "convertDateTime: timezone overflow"); + if (nHours < 10) + { + i_rBuffer.append('0'); + } + i_rBuffer.append(nHours); + i_rBuffer.append(':'); + if (nMinutes < 10) + { + i_rBuffer.append('0'); + } + i_rBuffer.append(nMinutes); + } +} + +/** convert util::Date to ISO "date" string */ +void Converter::convertDate( + OUStringBuffer& i_rBuffer, + const util::Date& i_rDate, + sal_Int16 const*const pTimeZoneOffset) +{ + const util::DateTime dt(0, 0, 0, 0, + i_rDate.Day, i_rDate.Month, i_rDate.Year, false); + convertDateTime(i_rBuffer, dt, pTimeZoneOffset); +} + +static void convertTime( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime) +{ + if (i_rDateTime.Hours < 10) { + i_rBuffer.append('0'); + } + i_rBuffer.append( OUString::number(static_cast<sal_Int32>(i_rDateTime.Hours)) + ":"); + if (i_rDateTime.Minutes < 10) { + i_rBuffer.append('0'); + } + i_rBuffer.append( OUString::number(static_cast<sal_Int32>(i_rDateTime.Minutes) ) + ":"); + if (i_rDateTime.Seconds < 10) { + i_rBuffer.append('0'); + } + i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Seconds) ); + if (i_rDateTime.NanoSeconds > 0) { + OSL_ENSURE(i_rDateTime.NanoSeconds < 1000000000,"NanoSeconds cannot be more than 999 999 999"); + i_rBuffer.append('.'); + std::ostringstream ostr; + ostr.fill('0'); + ostr.width(9); + ostr << i_rDateTime.NanoSeconds; + i_rBuffer.appendAscii(ostr.str().c_str()); + } +} + +static void convertTimeZone( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime, + sal_Int16 const* pTimeZoneOffset) +{ + if (pTimeZoneOffset) + { + lcl_AppendTimezone(i_rBuffer, *pTimeZoneOffset); + } + else if (i_rDateTime.IsUTC) + { + lcl_AppendTimezone(i_rBuffer, 0); + } +} + +/** convert util::DateTime to ISO "time" or "dateTime" string */ +void Converter::convertTimeOrDateTime( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime) +{ + if (i_rDateTime.Year == 0 || + i_rDateTime.Month < 1 || i_rDateTime.Month > 12 || + i_rDateTime.Day < 1 || i_rDateTime.Day > 31) + { + convertTime(i_rBuffer, i_rDateTime); + convertTimeZone(i_rBuffer, i_rDateTime, nullptr); + } + else + { + convertDateTime(i_rBuffer, i_rDateTime, nullptr, true); + } +} + +/** convert util::DateTime to ISO "date" or "dateTime" string */ +void Converter::convertDateTime( + OUStringBuffer& i_rBuffer, + const css::util::DateTime& i_rDateTime, + sal_Int16 const*const pTimeZoneOffset, + bool i_bAddTimeIf0AM ) +{ + const sal_Unicode dash('-'); + const sal_Unicode zero('0'); + + sal_Int32 const nYear(abs(i_rDateTime.Year)); + if (i_rDateTime.Year < 0) { + i_rBuffer.append(dash); // negative + } + if (nYear < 1000) { + i_rBuffer.append(zero); + } + if (nYear < 100) { + i_rBuffer.append(zero); + } + if (nYear < 10) { + i_rBuffer.append(zero); + } + i_rBuffer.append( OUString::number(nYear) + OUStringChar(dash) ); + if( i_rDateTime.Month < 10 ) { + i_rBuffer.append(zero); + } + i_rBuffer.append( OUString::number(i_rDateTime.Month) + OUStringChar(dash) ); + if( i_rDateTime.Day < 10 ) { + i_rBuffer.append(zero); + } + i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Day) ); + + if( i_rDateTime.Seconds != 0 || + i_rDateTime.Minutes != 0 || + i_rDateTime.Hours != 0 || + i_bAddTimeIf0AM ) + { + i_rBuffer.append('T'); + convertTime(i_rBuffer, i_rDateTime); + } + + convertTimeZone(i_rBuffer, i_rDateTime, pTimeZoneOffset); +} + +/** convert ISO "date" or "dateTime" string to util::DateTime */ +bool Converter::parseDateTime( util::DateTime& rDateTime, + std::u16string_view rString ) +{ + bool isDateTime; + return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr, + rString); +} + +/** convert ISO "date" or "dateTime" string to util::DateTime */ +bool Converter::parseDateTime( util::DateTime& rDateTime, + std::string_view rString ) +{ + bool isDateTime; + return parseDateOrDateTime(nullptr, rDateTime, isDateTime, nullptr, + rString); +} + +static bool lcl_isLeapYear(const sal_uInt32 nYear) +{ + return ((nYear % 4) == 0) + && (((nYear % 100) != 0) || ((nYear % 400) == 0)); +} + +static sal_uInt16 +lcl_MaxDaysPerMonth(const sal_Int32 nMonth, const sal_Int32 nYear) +{ + static const sal_uInt16 s_MaxDaysPerMonth[12] = + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + assert(0 < nMonth && nMonth <= 12); + if ((2 == nMonth) && lcl_isLeapYear(nYear)) + { + return 29; + } + return s_MaxDaysPerMonth[nMonth - 1]; +} + +static void lcl_ConvertToUTC( + sal_Int16 & o_rYear, sal_uInt16 & o_rMonth, sal_uInt16 & o_rDay, + sal_uInt16 & o_rHours, sal_uInt16 & o_rMinutes, + int const nSourceOffset) +{ + sal_Int16 nOffsetHours(abs(nSourceOffset) / 60); + sal_Int16 const nOffsetMinutes(abs(nSourceOffset) % 60); + o_rMinutes += nOffsetMinutes; + if (nSourceOffset < 0) + { + o_rMinutes += nOffsetMinutes; + if (60 <= o_rMinutes) + { + o_rMinutes -= 60; + ++nOffsetHours; + } + o_rHours += nOffsetHours; + if (o_rHours < 24) + { + return; + } + sal_Int16 nDayAdd(0); + while (24 <= o_rHours) + { + o_rHours -= 24; + ++nDayAdd; + } + if (o_rDay == 0) + { + return; // handle time without date - don't adjust what isn't there + } + o_rDay += nDayAdd; + sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(o_rMonth, o_rYear)); + if (o_rDay <= nDaysInMonth) + { + return; + } + o_rDay -= nDaysInMonth; + ++o_rMonth; + if (o_rMonth <= 12) + { + return; + } + o_rMonth = 1; + ++o_rYear; // works for negative year too + } + else if (0 < nSourceOffset) + { + // argh everything is unsigned + if (o_rMinutes < nOffsetMinutes) + { + o_rMinutes += 60; + ++nOffsetHours; + } + o_rMinutes -= nOffsetMinutes; + sal_Int16 nDaySubtract(0); + while (o_rHours < nOffsetHours) + { + o_rHours += 24; + ++nDaySubtract; + } + o_rHours -= nOffsetHours; + if (o_rDay == 0) + { + return; // handle time without date - don't adjust what isn't there + } + if (nDaySubtract < o_rDay) + { + o_rDay -= nDaySubtract; + return; + } + sal_Int16 const nPrevMonth((o_rMonth == 1) ? 12 : o_rMonth - 1); + sal_Int16 const nDaysInMonth(lcl_MaxDaysPerMonth(nPrevMonth, o_rYear)); + o_rDay += nDaysInMonth; + --o_rMonth; + if (0 == o_rMonth) + { + o_rMonth = 12; + --o_rYear; // works for negative year too + } + o_rDay -= nDaySubtract; + } +} + +template <typename V> +static bool +readDateTimeComponent(V rString, + size_t & io_rnPos, sal_Int32 & o_rnTarget, + const sal_Int32 nMinLength, const bool bExactLength) +{ + const size_t nOldPos(io_rnPos); + sal_Int32 nTemp(0); + if (R_SUCCESS != readUnsignedNumber<V>(rString, io_rnPos, nTemp)) + { + return false; + } + const sal_Int32 nTokenLength(io_rnPos - nOldPos); + if ((nTokenLength < nMinLength) || + (bExactLength && (nTokenLength > nMinLength))) + { + return false; // bad length + } + o_rnTarget = nTemp; + return true; +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +template<typename V> +static bool lcl_parseDate( + bool & isNegative, + sal_Int32 & nYear, sal_Int32 & nMonth, sal_Int32 & nDay, + bool & bHaveTime, + size_t & nPos, + V string, + bool const bIgnoreInvalidOrMissingDate) +{ + bool bSuccess = true; + + if (string.size() > nPos) + { + if ('-' == string[nPos]) + { + isNegative = true; + ++nPos; + } + } + + { + // While W3C XMLSchema specifies years with a minimum of 4 digits, be + // lenient in what we accept for years < 1000. One digit is acceptable + // if the remainders match. + bSuccess = readDateTimeComponent<V>(string, nPos, nYear, 1, false); + if (!bIgnoreInvalidOrMissingDate) + { + bSuccess &= (0 < nYear); + } + bSuccess &= (nPos < string.size()); // not last token + } + if (bSuccess && ('-' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent<V>(string, nPos, nMonth, 2, true); + if (!bIgnoreInvalidOrMissingDate) + { + bSuccess &= (0 < nMonth); + } + bSuccess &= (nMonth <= 12); + bSuccess &= (nPos < string.size()); // not last token + } + if (bSuccess && ('-' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nDay, 2, true); + if (!bIgnoreInvalidOrMissingDate) + { + bSuccess &= (0 < nDay); + } + if (nMonth > 0) // not possible to check if month was missing + { + bSuccess &= (nDay <= lcl_MaxDaysPerMonth(nMonth, nYear)); + } + else assert(bIgnoreInvalidOrMissingDate); + } + + if (bSuccess && (nPos < string.size())) + { + if ('T' == string[nPos] || 't' == string[nPos]) // time separator + { + bHaveTime = true; + ++nPos; + } + } + + return bSuccess; +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +template <typename V> +static bool lcl_parseDateTime( + util::Date *const pDate, util::DateTime & rDateTime, + bool & rbDateTime, + std::optional<sal_Int16> *const pTimeZoneOffset, + V string, + bool const bIgnoreInvalidOrMissingDate) +{ + bool bSuccess = true; + + string = o3tl::trim(string); + + bool isNegative(false); + sal_Int32 nYear(0); + sal_Int32 nMonth(0); + sal_Int32 nDay(0); + size_t nPos(0); + bool bHaveTime(false); + + if ( !bIgnoreInvalidOrMissingDate + || string.find(':') == V::npos // no time? + || (string.find('-') != V::npos + && string.find('-') < string.find(':'))) + { + bSuccess &= lcl_parseDate<V>(isNegative, nYear, nMonth, nDay, + bHaveTime, nPos, string, bIgnoreInvalidOrMissingDate); + } + else + { + bHaveTime = true; + } + + sal_Int32 nHours(0); + sal_Int32 nMinutes(0); + sal_Int32 nSeconds(0); + sal_Int32 nNanoSeconds(0); + if (bSuccess && bHaveTime) + { + { + bSuccess = readDateTimeComponent(string, nPos, nHours, 2, true); + bSuccess &= (0 <= nHours) && (nHours <= 24); + bSuccess &= (nPos < string.size()); // not last token + } + if (bSuccess && (':' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nMinutes, 2, true); + bSuccess &= (0 <= nMinutes) && (nMinutes < 60); + bSuccess &= (nPos < string.size()); // not last token + } + if (bSuccess && (':' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent(string, nPos, nSeconds, 2, true); + bSuccess &= (0 <= nSeconds) && (nSeconds < 60); + } + if (bSuccess && (nPos < string.size()) && + ('.' == string[nPos] || ',' == string[nPos])) // fraction separator + { + ++nPos; + const sal_Int32 nStart(nPos); + sal_Int32 nTemp(0); + if (R_NOTHING == readUnsignedNumberMaxDigits<V>(9, string, nPos, nTemp)) + { + bSuccess = false; + } + if (bSuccess) + { + sal_Int32 nDigits = std::min<sal_Int32>(nPos - nStart, 9); + assert(nDigits > 0); + for (; nDigits < 9; ++nDigits) + { + nTemp *= 10; + } + nNanoSeconds = nTemp; + } + } + + if (bSuccess && (nHours == 24)) + { + if (!((0 == nMinutes) && (0 == nSeconds) && (0 == nNanoSeconds))) + { + bSuccess = false; // only 24:00:00 is valid + } + } + } + + bool bHaveTimezone(false); + bool bHaveTimezonePlus(false); + bool bHaveTimezoneMinus(false); + if (bSuccess && (nPos < string.size())) + { + const sal_Unicode c(string[nPos]); + if ('+' == c) + { + bHaveTimezone = true; + bHaveTimezonePlus = true; + ++nPos; + } + else if ('-' == c) + { + bHaveTimezone = true; + bHaveTimezoneMinus = true; + ++nPos; + } + else if ('Z' == c || 'z' == c) + { + bHaveTimezone = true; + ++nPos; + } + else + { + bSuccess = false; + } + } + sal_Int32 nTimezoneHours(0); + sal_Int32 nTimezoneMinutes(0); + if (bSuccess && (bHaveTimezonePlus || bHaveTimezoneMinus)) + { + bSuccess = readDateTimeComponent<V>( + string, nPos, nTimezoneHours, 2, true); + bSuccess &= (0 <= nTimezoneHours) && (nTimezoneHours <= 14); + bSuccess &= (nPos < string.size()); // not last token + if (bSuccess && (':' != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + + bSuccess = readDateTimeComponent<V>( + string, nPos, nTimezoneMinutes, 2, true); + bSuccess &= (0 <= nTimezoneMinutes) && (nTimezoneMinutes < 60); + } + if (bSuccess && (nTimezoneHours == 14)) + { + if (0 != nTimezoneMinutes) + { + bSuccess = false; // only +-14:00 is valid + } + } + } + + bSuccess &= (nPos == string.size()); // trailing junk? + + if (bSuccess) + { + sal_Int16 const nTimezoneOffset = (bHaveTimezoneMinus ? -1 : +1) + * ((nTimezoneHours * 60) + nTimezoneMinutes); + if (!pDate || bHaveTime) // time is optional + { + rDateTime.Year = + (isNegative ? -1 : +1) * static_cast<sal_Int16>(nYear); + rDateTime.Month = static_cast<sal_uInt16>(nMonth); + rDateTime.Day = static_cast<sal_uInt16>(nDay); + rDateTime.Hours = static_cast<sal_uInt16>(nHours); + rDateTime.Minutes = static_cast<sal_uInt16>(nMinutes); + rDateTime.Seconds = static_cast<sal_uInt16>(nSeconds); + rDateTime.NanoSeconds = static_cast<sal_uInt32>(nNanoSeconds); + if (bHaveTimezone) + { + if (pTimeZoneOffset) + { + *pTimeZoneOffset = nTimezoneOffset; + rDateTime.IsUTC = (0 == nTimezoneOffset); + } + else + { + lcl_ConvertToUTC(rDateTime.Year, rDateTime.Month, + rDateTime.Day, rDateTime.Hours, rDateTime.Minutes, + nTimezoneOffset); + rDateTime.IsUTC = true; + } + } + else + { + if (pTimeZoneOffset) + { + pTimeZoneOffset->reset(); + } + rDateTime.IsUTC = false; + } + rbDateTime = bHaveTime; + } + else + { + pDate->Year = + (isNegative ? -1 : +1) * static_cast<sal_Int16>(nYear); + pDate->Month = static_cast<sal_uInt16>(nMonth); + pDate->Day = static_cast<sal_uInt16>(nDay); + if (bHaveTimezone) + { + if (pTimeZoneOffset) + { + *pTimeZoneOffset = nTimezoneOffset; + } + else + { + // a Date cannot be adjusted + SAL_INFO("sax", "dropping timezone"); + } + } + else + { + if (pTimeZoneOffset) + { + pTimeZoneOffset->reset(); + } + } + rbDateTime = false; + } + } + return bSuccess; +} + +/** convert ISO "time" or "dateTime" string to util::DateTime */ +bool Converter::parseTimeOrDateTime( + util::DateTime & rDateTime, + std::u16string_view rString) +{ + bool dummy; + return lcl_parseDateTime( + nullptr, rDateTime, dummy, nullptr, rString, true); +} + +/** convert ISO "time" or "dateTime" string to util::DateTime */ +bool Converter::parseTimeOrDateTime( + util::DateTime & rDateTime, + std::string_view rString) +{ + bool dummy; + return lcl_parseDateTime( + nullptr, rDateTime, dummy, nullptr, rString, true); +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +bool Converter::parseDateOrDateTime( + util::Date *const pDate, util::DateTime & rDateTime, + bool & rbDateTime, + std::optional<sal_Int16> *const pTimeZoneOffset, + std::u16string_view rString ) +{ + return lcl_parseDateTime( + pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false); +} + +/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ +bool Converter::parseDateOrDateTime( + util::Date *const pDate, util::DateTime & rDateTime, + bool & rbDateTime, + std::optional<sal_Int16> *const pTimeZoneOffset, + std::string_view rString ) +{ + return lcl_parseDateTime( + pDate, rDateTime, rbDateTime, pTimeZoneOffset, rString, false); +} + +/** gets the position of the first comma after npos in the string + rStr. Commas inside '"' pairs are not matched */ +sal_Int32 Converter::indexOfComma( std::u16string_view rStr, + sal_Int32 nPos ) +{ + sal_Unicode cQuote = 0; + sal_Int32 nLen = rStr.size(); + for( ; nPos < nLen; nPos++ ) + { + sal_Unicode c = rStr[nPos]; + switch( c ) + { + case u'\'': + if( 0 == cQuote ) + cQuote = c; + else if( '\'' == cQuote ) + cQuote = 0; + break; + + case u'"': + if( 0 == cQuote ) + cQuote = c; + else if( '\"' == cQuote ) + cQuote = 0; + break; + + case u',': + if( 0 == cQuote ) + return nPos; + break; + } + } + + return -1; +} + +double Converter::GetConversionFactor(OUStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit) +{ + double fRetval(1.0); + rUnit.setLength(0); + + + if(nSourceUnit != nTargetUnit) + { + const o3tl::Length eFrom = Measure2O3tlUnit(nSourceUnit); + const o3tl::Length eTo = Measure2O3tlUnit(nTargetUnit); + fRetval = o3tl::convert(1.0, eFrom, eTo); + + if (const auto sUnit = Measure2UnitString(nTargetUnit); sUnit.size() > 0) + rUnit.appendAscii(sUnit.data(), sUnit.size()); + } + + return fRetval; +} + +double Converter::GetConversionFactor(OStringBuffer& rUnit, sal_Int16 nSourceUnit, sal_Int16 nTargetUnit) +{ + double fRetval(1.0); + rUnit.setLength(0); + + + if(nSourceUnit != nTargetUnit) + { + const o3tl::Length eFrom = Measure2O3tlUnit(nSourceUnit); + const o3tl::Length eTo = Measure2O3tlUnit(nTargetUnit); + fRetval = o3tl::convert(1.0, eFrom, eTo); + + if (const auto sUnit = Measure2UnitString(nTargetUnit); sUnit.size() > 0) + rUnit.append(sUnit.data(), sUnit.size()); + } + + return fRetval; +} + +template<typename V> +static sal_Int16 lcl_GetUnitFromString(V rString, sal_Int16 nDefaultUnit) +{ + sal_Int32 nPos = 0; + sal_Int32 nLen = rString.size(); + sal_Int16 nRetUnit = nDefaultUnit; + + // skip white space + while( nPos < nLen && ' ' == rString[nPos] ) + nPos++; + + // skip negative + if( nPos < nLen && '-' == rString[nPos] ) + nPos++; + + // skip number + while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] ) + nPos++; + + if( nPos < nLen && '.' == rString[nPos] ) + { + nPos++; + while( nPos < nLen && '0' <= rString[nPos] && '9' >= rString[nPos] ) + nPos++; + } + + // skip white space + while( nPos < nLen && ' ' == rString[nPos] ) + nPos++; + + if( nPos < nLen ) + { + switch(rString[nPos]) + { + case '%' : + { + nRetUnit = MeasureUnit::PERCENT; + break; + } + case 'c': + case 'C': + { + if(nPos+1 < nLen && (rString[nPos+1] == 'm' + || rString[nPos+1] == 'M')) + nRetUnit = MeasureUnit::CM; + break; + } + case 'e': + case 'E': + { + // CSS1_EMS or CSS1_EMX later + break; + } + case 'i': + case 'I': + { + if(nPos+1 < nLen && (rString[nPos+1] == 'n' + || rString[nPos+1] == 'N')) + nRetUnit = MeasureUnit::INCH; + break; + } + case 'm': + case 'M': + { + if(nPos+1 < nLen && (rString[nPos+1] == 'm' + || rString[nPos+1] == 'M')) + nRetUnit = MeasureUnit::MM; + break; + } + case 'p': + case 'P': + { + if(nPos+1 < nLen && (rString[nPos+1] == 't' + || rString[nPos+1] == 'T')) + nRetUnit = MeasureUnit::POINT; + if(nPos+1 < nLen && (rString[nPos+1] == 'c' + || rString[nPos+1] == 'C')) + nRetUnit = MeasureUnit::TWIP; + break; + } + } + } + + return nRetUnit; +} + +sal_Int16 Converter::GetUnitFromString(std::u16string_view rString, sal_Int16 nDefaultUnit) +{ + return lcl_GetUnitFromString(rString, nDefaultUnit); +} +sal_Int16 Converter::GetUnitFromString(std::string_view rString, sal_Int16 nDefaultUnit) +{ + return lcl_GetUnitFromString(rString, nDefaultUnit); +} + +bool Converter::convertAny(OUStringBuffer& rsValue, + OUStringBuffer& rsType , + const css::uno::Any& rValue) +{ + bool bConverted = false; + + rsValue.setLength(0); + rsType.setLength (0); + + switch (rValue.getValueTypeClass()) + { + case css::uno::TypeClass_BYTE : + case css::uno::TypeClass_SHORT : + case css::uno::TypeClass_UNSIGNED_SHORT : + case css::uno::TypeClass_LONG : + case css::uno::TypeClass_UNSIGNED_LONG : + { + sal_Int32 nTempValue = 0; + if (rValue >>= nTempValue) + { + rsType.append("integer"); + bConverted = true; + rsValue.append(nTempValue); + } + } + break; + + case css::uno::TypeClass_BOOLEAN : + { + bool bTempValue = false; + if (rValue >>= bTempValue) + { + rsType.append("boolean"); + bConverted = true; + ::sax::Converter::convertBool(rsValue, bTempValue); + } + } + break; + + case css::uno::TypeClass_FLOAT : + case css::uno::TypeClass_DOUBLE : + { + double fTempValue = 0.0; + if (rValue >>= fTempValue) + { + rsType.append("float"); + bConverted = true; + ::sax::Converter::convertDouble(rsValue, fTempValue); + } + } + break; + + case css::uno::TypeClass_STRING : + { + OUString sTempValue; + if (rValue >>= sTempValue) + { + rsType.append("string"); + bConverted = true; + rsValue.append(sTempValue); + } + } + break; + + case css::uno::TypeClass_STRUCT : + { + css::util::Date aDate ; + css::util::Time aTime ; + css::util::DateTime aDateTime; + + if (rValue >>= aDate) + { + rsType.append("date"); + bConverted = true; + css::util::DateTime aTempValue; + aTempValue.Day = aDate.Day; + aTempValue.Month = aDate.Month; + aTempValue.Year = aDate.Year; + aTempValue.NanoSeconds = 0; + aTempValue.Seconds = 0; + aTempValue.Minutes = 0; + aTempValue.Hours = 0; + ::sax::Converter::convertDateTime(rsValue, aTempValue, nullptr); + } + else + if (rValue >>= aTime) + { + rsType.append("time"); + bConverted = true; + css::util::Duration aTempValue; + aTempValue.Days = 0; + aTempValue.Months = 0; + aTempValue.Years = 0; + aTempValue.NanoSeconds = aTime.NanoSeconds; + aTempValue.Seconds = aTime.Seconds; + aTempValue.Minutes = aTime.Minutes; + aTempValue.Hours = aTime.Hours; + ::sax::Converter::convertDuration(rsValue, aTempValue); + } + else + if (rValue >>= aDateTime) + { + rsType.append("date"); + bConverted = true; + ::sax::Converter::convertDateTime(rsValue, aDateTime, nullptr); + } + } + break; + default: + break; + } + + return bConverted; +} + +void Converter::convertBytesToHexBinary(OUStringBuffer& rBuffer, const void* pBytes, + sal_Int32 nBytes) +{ + rBuffer.setLength(0); + rBuffer.ensureCapacity(nBytes * 2); + auto pChars = static_cast<const unsigned char*>(pBytes); + for (sal_Int32 i = 0; i < nBytes; ++i) + { + sal_Int32 c = *pChars++; + if (c < 16) + rBuffer.append('0'); + rBuffer.append(c, 16); + } +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx new file mode 100644 index 0000000000..45e2e9c5eb --- /dev/null +++ b/sax/source/tools/fastattribs.cxx @@ -0,0 +1,290 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <algorithm> + +#include <com/sun/star/xml/sax/SAXException.hpp> +#include <sax/fastattribs.hxx> +#include <utility> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::xml; +using namespace ::com::sun::star::xml::sax; +namespace sax_fastparser +{ + +// wastage to keep MSVC happy vs. an in-line {} +FastTokenHandlerBase::~FastTokenHandlerBase() +{ +} + +UnknownAttribute::UnknownAttribute( OUString aNamespaceURL, OString aName, OString value ) + : maNamespaceURL(std::move( aNamespaceURL )), maName(std::move( aName )), maValue(std::move( value )) +{ +} + +UnknownAttribute::UnknownAttribute( OString aName, OString value ) + : maName(std::move( aName )), maValue(std::move( value )) +{ +} + +void UnknownAttribute::FillAttribute( Attribute* pAttrib ) const +{ + if( pAttrib ) + { + pAttrib->Name = OStringToOUString( maName, RTL_TEXTENCODING_UTF8 ); + pAttrib->NamespaceURL = maNamespaceURL; + pAttrib->Value = OStringToOUString( maValue, RTL_TEXTENCODING_UTF8 ); + } +} + +FastAttributeList::FastAttributeList( sax_fastparser::FastTokenHandlerBase *pTokenHandler) +: mpTokenHandler( pTokenHandler ) +{ + // random initial size of buffer to store attribute values + mnChunkLength = 58; + mpChunk = static_cast<char *>(malloc( mnChunkLength )); + maAttributeValues.push_back( 0 ); +} + +FastAttributeList::FastAttributeList( const css::uno::Reference< css::xml::sax::XFastAttributeList > & xAttrList ) +{ + const auto& rOther = castToFastAttributeList(xAttrList); + mpTokenHandler = rOther.mpTokenHandler; + mpChunk = static_cast<char *>(malloc( rOther.mnChunkLength )); + mnChunkLength = rOther.mnChunkLength; + memcpy(mpChunk, rOther.mpChunk, rOther.mnChunkLength); + maAttributeValues = rOther.maAttributeValues; + maAttributeTokens = rOther.maAttributeTokens; + maUnknownAttributes = rOther.maUnknownAttributes; +} + +css::uno::Reference< ::css::util::XCloneable > FastAttributeList::createClone() +{ + return new FastAttributeList(this); +} + +FastAttributeList::~FastAttributeList() +{ + free( mpChunk ); +} + +void FastAttributeList::clear() +{ + maAttributeTokens.clear(); + maAttributeValues.resize(1); + assert(maAttributeValues[0] == 0); + maUnknownAttributes.clear(); +} + +void FastAttributeList::add( sal_Int32 nToken, std::string_view value ) +{ + assert(nToken != -1); + assert(nToken != 0); + assert(value.length() < SAL_MAX_INT32); // protect against absurd values + maAttributeTokens.push_back( nToken ); + sal_Int32 nWritePosition = maAttributeValues.back(); + maAttributeValues.push_back( maAttributeValues.back() + value.length() + 1 ); + if (maAttributeValues.back() > mnChunkLength) + { + const sal_Int32 newLen = std::max(mnChunkLength * 2, maAttributeValues.back()); + auto p = static_cast<char*>(realloc(mpChunk, newLen)); + if (!p) + throw std::bad_alloc(); + + mnChunkLength = newLen; + mpChunk = p; + + } + memcpy(mpChunk + nWritePosition, value.data(), value.length()); + mpChunk[nWritePosition + value.length()] = '\0'; +} + +void FastAttributeList::add(sal_Int32 nToken, std::u16string_view sValue) +{ + add(nToken, OUStringToOString(sValue, RTL_TEXTENCODING_UTF8)); +} + +void FastAttributeList::addNS( sal_Int32 nNamespaceToken, sal_Int32 nToken, std::string_view rValue ) +{ + sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken; + add( nCombinedToken, rValue ); +} + +void FastAttributeList::addNS(sal_Int32 nNamespaceToken, sal_Int32 nToken, + std::u16string_view sValue) +{ + sal_Int32 nCombinedToken = (nNamespaceToken << 16) | nToken; + add(nCombinedToken, sValue); +} + +void FastAttributeList::addUnknown( const OUString& rNamespaceURL, const OString& rName, const OString& value ) +{ + maUnknownAttributes.emplace_back( rNamespaceURL, rName, value ); +} + +void FastAttributeList::addUnknown( const OString& rName, const OString& value ) +{ + maUnknownAttributes.emplace_back( rName, value ); +} + +void FastAttributeList::add( const css::uno::Reference<css::xml::sax::XFastAttributeList>& xAttrList ) +{ + const auto& rOther = castToFastAttributeList(xAttrList); + add(rOther); +} + +void FastAttributeList::add( const FastAttributeList& rOther ) +{ + for (size_t i=0; i < rOther.maAttributeTokens.size(); ++i) + add(rOther.maAttributeTokens[i], rOther.getAsViewByIndex(i)); + for (const auto & i : rOther.maUnknownAttributes) + addUnknown(i.maNamespaceURL, i.maName, i.maValue); +} + +// XFastAttributeList +sal_Bool FastAttributeList::hasAttribute( ::sal_Int32 Token ) +{ + for (sal_Int32 i : maAttributeTokens) + if (i == Token) + return true; + + return false; +} + +sal_Int32 FastAttributeList::getValueToken( ::sal_Int32 Token ) +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if (maAttributeTokens[i] == Token) + return FastTokenHandlerBase::getTokenFromChars( + mpTokenHandler, + getAsViewByIndex(i) ); + + throw SAXException("FastAttributeList::getValueToken: unknown token " + OUString::number(Token), nullptr, Any()); +} + +sal_Int32 FastAttributeList::getOptionalValueToken( ::sal_Int32 Token, ::sal_Int32 Default ) +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if (maAttributeTokens[i] == Token) + return FastTokenHandlerBase::getTokenFromChars( + mpTokenHandler, + getAsViewByIndex(i) ); + + return Default; +} + +// performance sensitive shortcuts to avoid allocation ... +bool FastAttributeList::getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const +{ + rInt = 0; + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if (maAttributeTokens[i] == nToken) + { + rInt = getAsIntegerByIndex(i); + return true; + } + return false; +} + +bool FastAttributeList::getAsDouble( sal_Int32 nToken, double &rDouble) const +{ + rDouble = 0.0; + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if (maAttributeTokens[i] == nToken) + { + rDouble = o3tl::toDouble(getAsViewByIndex(i)); + return true; + } + return false; +} + +bool FastAttributeList::getAsView( sal_Int32 nToken, std::string_view& rPos ) const +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + { + if (maAttributeTokens[i] != nToken) + continue; + + rPos = getAsViewByIndex(i); + return true; + } + + return false; +} + +OUString FastAttributeList::getValue( ::sal_Int32 Token ) +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if (maAttributeTokens[i] == Token) + return getValueByIndex(i); + + throw SAXException("FastAttributeList::getValue: unknown token " + OUString::number(Token), nullptr, Any()); +} + +OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token ) +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if (maAttributeTokens[i] == Token) + return getValueByIndex(i); + + return OUString(); +} +Sequence< Attribute > FastAttributeList::getUnknownAttributes( ) +{ + auto nSize = maUnknownAttributes.size(); + if (nSize == 0) + return {}; + Sequence< Attribute > aSeq( nSize ); + Attribute* pAttr = aSeq.getArray(); + for( const auto& rAttr : maUnknownAttributes ) + rAttr.FillAttribute( pAttr++ ); + return aSeq; +} +Sequence< FastAttribute > FastAttributeList::getFastAttributes( ) +{ + Sequence< FastAttribute > aSeq( maAttributeTokens.size() ); + FastAttribute* pAttr = aSeq.getArray(); + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + { + pAttr->Token = maAttributeTokens[i]; + pAttr->Value = getValueByIndex(i); + pAttr++; + } + return aSeq; +} + +FastAttributeList::FastAttributeIter FastAttributeList::find( sal_Int32 nToken ) const +{ + for (size_t i = 0, n = maAttributeTokens.size(); i < n; ++i) + if( maAttributeTokens[i] == nToken ) + return FastAttributeIter(*this, i); + return end(); +} + +sal_Int32 FastTokenHandlerBase::getTokenFromChars( + const FastTokenHandlerBase *pTokenHandler, + std::string_view token ) +{ + return pTokenHandler->getTokenDirect(token.data(), token.size()); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx new file mode 100644 index 0000000000..0a5c227b72 --- /dev/null +++ b/sax/source/tools/fastserializer.cxx @@ -0,0 +1,844 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include "fastserializer.hxx" + +#include <com/sun/star/xml/sax/FastTokenHandler.hpp> +#include <rtl/math.h> +#include <sal/log.hxx> +#include <comphelper/processfactory.hxx> + +#include <cassert> +#include <optional> +#include <string.h> +#include <string_view> +#include <utility> + +#if OSL_DEBUG_LEVEL > 0 +#include <iostream> +#include <set> +#endif + +using ::std::vector; +using ::com::sun::star::uno::Sequence; +using ::com::sun::star::io::XOutputStream; + +#define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0) +#define NAMESPACE(x) (x >> 16) +#define TOKEN(x) (x & 0xffff) +// number of characters without terminating 0 +#define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1) + +const char sClosingBracket[] = ">"; +const char sSlashAndClosingBracket[] = "/>"; +constexpr OString sColon = ":"_ostr; +const char sOpeningBracket[] = "<"; +const char sOpeningBracketAndSlash[] = "</"; +const char sQuote[] = "\""; +const char sEqualSignAndQuote[] = "=\""; +const char sSpace[] = " "; +const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"; + +namespace sax_fastparser { + FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream ) + : mbMarkStackEmpty(true) + , mpDoubleStr(nullptr) + , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE) + , mbXescape(true) + { + rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity); + mxFastTokenHandler = css::xml::sax::FastTokenHandler::create( + ::comphelper::getProcessComponentContext()); + assert(xOutputStream.is()); // cannot do anything without that + maCachedOutputStream.setOutputStream( xOutputStream ); + } + + FastSaxSerializer::~FastSaxSerializer() + { + rtl_string_release(mpDoubleStr); + } + + void FastSaxSerializer::startDocument() + { + writeBytes(sXmlHeader, N_CHARS(sXmlHeader)); + } + + void FastSaxSerializer::write( double value ) + { + rtl_math_doubleToString( + &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G, + RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr, + 0, true); + + write(mpDoubleStr->buffer, mpDoubleStr->length); + // and "clear" the string + mpDoubleStr->length = 0; + mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE; + } + + void FastSaxSerializer::write( std::u16string_view sOutput, bool bEscape ) + { + write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape ); + + } + + void FastSaxSerializer::write( std::string_view sOutput, bool bEscape ) + { + write( sOutput.data(), sOutput.length(), bEscape ); + } + + /** Characters not allowed in XML 1.0 + XML 1.1 would exclude only U+0000 + + This assumes that `string` is UTF-8, but which appears to generally be the case: The only + user of this FastSaxSerializer code is FastSerializerHelper, and when its constructor + (sax/source/tools/fshelper.cxx) is called with bWriteHeader being true, it calls + FastSaxSerializer::startDocument, which writes sXmlHeader claiming encoding="UTF-8". The + only place that appears to construct FastSerializerHelper appears to be + XmlFilterBase::openFragmentStreamWithSerializer (oox/source/core/xmlfilterbase.cxx), and it + only passes false for bWriteHeader when the given rMediaType contains "vml" but not "+xml" + (see <https://git.libreoffice.org/core/+/6a11add2c4ea975356cfb7bab02301788c79c904%5E!/> + "XLSX VML Export fixes", stating "Don't write xml headers for vml files"). But lets assume + that even such Vector Markup Language files are written as UTF-8. + */ + template<typename Int> static std::optional<std::pair<unsigned, Int>> invalidChar( + char const * string, Int length, Int index ) + { + assert(index < length); + auto const c = string[index]; + + if (static_cast<unsigned char>(c) >= 0x20 && c != '\xEF') + return {}; + + switch (c) + { + case 0x09: + case 0x0a: + case 0x0d: + return {}; + case '\xEF': // U+FFFE, U+FFFF: + if (length - index >= 3 && string[index + 1] == '\xBF') { + switch (string[index + 2]) { + case '\xBE': + return std::pair(0xFFFE, 3); + case '\xBF': + return std::pair(0xFFFF, 3); + } + } + return {}; + } + return std::pair(static_cast<unsigned char>(c), 1); + } + + static bool isHexDigit( char c ) + { + return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f'); + } + + void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape ) + { + if (nLen == -1) + nLen = pStr ? strlen(pStr) : 0; + + if (!bEscape) + { + writeBytes( pStr, nLen ); + return; + } + + bool bGood = true; + const sal_Int32 kXescapeLen = 7; + char bufXescape[kXescapeLen+1]; + sal_Int32 nNextXescape = 0; + for (sal_Int32 i = 0; i < nLen;) + { + char c = pStr[ i ]; + switch( c ) + { + case '<': writeBytes( "<", 4 ); break; + case '>': writeBytes( ">", 4 ); break; + case '&': writeBytes( "&", 5 ); break; + case '\'': writeBytes( "'", 6 ); break; + case '"': writeBytes( """, 6 ); break; + case '\t': +#if 0 + // Seems OOXML prefers the _xHHHH_ escape over the + // entity in *some* cases, apparently in attribute + // values but not in element data. + // Would need to distinguish at a higher level. + if (mbXescape) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast<unsigned int>(static_cast<unsigned char>(c))); + writeBytes( bufXescape, kXescapeLen); + } + else +#endif + { + writeBytes( "	", 4 ); + } + break; + case '\n': +#if 0 + if (mbXescape) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast<unsigned int>(static_cast<unsigned char>(c))); + writeBytes( bufXescape, kXescapeLen); + } + else +#endif + { + writeBytes( " ", 5 ); + } + break; + case '\r': +#if 0 + if (mbXescape) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + static_cast<unsigned int>(static_cast<unsigned char>(c))); + writeBytes( bufXescape, kXescapeLen); + } + else +#endif + { + writeBytes( " ", 5 ); + } + break; + default: + if (mbXescape) + { + char c1, c2, c3, c4; + // Escape characters not valid in XML 1.0 as + // _xHHHH_. A literal "_xHHHH_" has to be + // escaped as _x005F_xHHHH_ (effectively + // escaping the leading '_'). + // See ECMA-376-1:2016 page 3736, + // 22.4.2.4 bstr (Basic String) + // for reference. + if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen && + pStr[i+6] == '_' && + ((pStr[i+1] | 0x20) == 'x') && + isHexDigit( c1 = pStr[i+2] ) && + isHexDigit( c2 = pStr[i+3] ) && + isHexDigit( c3 = pStr[i+4] ) && + isHexDigit( c4 = pStr[i+5] )) + { + // OOXML has the odd habit to write some + // names using this that when re-saving + // should *not* be escaped, specifically + // _x0020_ for blanks in w:xpath values. + if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0')) + { + // When encountering "_x005F_xHHHH_" + // assume that is an already escaped + // sequence that was not unescaped and + // shall be written as is, to not end + // up with "_x005F_x005F_xHHHH_" and + // repeated... + if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' && + i + kXescapeLen <= nLen - 6 && + pStr[i+kXescapeLen+5] == '_' && + ((pStr[i+kXescapeLen+0] | 0x20) == 'x') && + isHexDigit( pStr[i+kXescapeLen+1] ) && + isHexDigit( pStr[i+kXescapeLen+2] ) && + isHexDigit( pStr[i+kXescapeLen+3] ) && + isHexDigit( pStr[i+kXescapeLen+4] )) + { + writeBytes( &c, 1 ); + // Remember this fake escapement. + nNextXescape = i + kXescapeLen + 6; + } + else + { + writeBytes( "_x005F_", kXescapeLen); + // Remember this escapement so in + // _xHHHH_xHHHH_ only the first '_' + // is escaped. + nNextXescape = i + kXescapeLen; + } + break; + } + } + if (auto const inv = invalidChar(pStr, nLen, i)) + { + snprintf( bufXescape, kXescapeLen+1, "_x%04x_", + inv->first); + writeBytes( bufXescape, kXescapeLen); + i += inv->second; + continue; + } + } +#if OSL_DEBUG_LEVEL > 0 + else + { + if (bGood && invalidChar(pStr, nLen, i)) + { + bGood = false; + // The SAL_WARN() for the single character is + // issued in writeBytes(), just gather for the + // SAL_WARN_IF() below. + } + } +#endif + writeBytes( &c, 1 ); + break; + } + ++i; + } + SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'"); + } + + void FastSaxSerializer::endDocument() + { + assert(mbMarkStackEmpty && maMarkStack.empty()); + maCachedOutputStream.flush(); + } + + void FastSaxSerializer::writeId( ::sal_Int32 nElement ) + { + if( HAS_NAMESPACE( nElement ) ) { + auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); + assert(Namespace.hasElements()); + writeBytes(Namespace); + writeBytes(sColon.getStr(), sColon.getLength()); + auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); + assert(Element.hasElements()); + writeBytes(Element); + } else { + auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement)); + assert(Element.hasElements()); + writeBytes(Element); + } + } + +#ifdef DBG_UTIL + OString FastSaxSerializer::getId( ::sal_Int32 nElement ) + { + if (HAS_NAMESPACE(nElement)) { + Sequence<sal_Int8> const ns( + mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement))); + Sequence<sal_Int8> const name( + mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement))); + return std::string_view( + reinterpret_cast<char const*>(ns.getConstArray()), ns.getLength()) + + sColon + + std::string_view( + reinterpret_cast<char const*>(name.getConstArray()), name.getLength()); + } else { + Sequence<sal_Int8> const name( + mxFastTokenHandler->getUTF8Identifier(nElement)); + return OString(reinterpret_cast<char const*>(name.getConstArray()), name.getLength()); + } + } +#endif + + void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList ) + { + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); + maMarkStack.top()->setCurrentElement( Element ); + } + +#ifdef DBG_UTIL + if (mbMarkStackEmpty) + m_DebugStartedElements.push(Element); + else + maMarkStack.top()->m_DebugStartedElements.push_back(Element); +#endif + + writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); + + writeId(Element); + if (pAttrList) + writeFastAttributeList(*pAttrList); + else + writeTokenValueList(); + + writeBytes(sClosingBracket, N_CHARS(sClosingBracket)); + } + + void FastSaxSerializer::endFastElement( ::sal_Int32 Element ) + { +#ifdef DBG_UTIL + // Well-formedness constraint: Element Type Match + if (mbMarkStackEmpty) + { + assert(!m_DebugStartedElements.empty()); + assert(Element == m_DebugStartedElements.top()); + m_DebugStartedElements.pop(); + } + else + { + if (dynamic_cast<ForSort*>(maMarkStack.top().get())) + { + // Sort is always well-formed fragment + assert(!maMarkStack.top()->m_DebugStartedElements.empty()); + } + if (maMarkStack.top()->m_DebugStartedElements.empty()) + { + maMarkStack.top()->m_DebugEndedElements.push_back(Element); + } + else + { + assert(Element == maMarkStack.top()->m_DebugStartedElements.back()); + maMarkStack.top()->m_DebugStartedElements.pop_back(); + } + } +#endif + + writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash)); + + writeId(Element); + + writeBytes(sClosingBracket, N_CHARS(sClosingBracket)); + } + + void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList ) + { + if ( !mbMarkStackEmpty ) + { + maCachedOutputStream.flush(); + maMarkStack.top()->setCurrentElement( Element ); + } + + writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket)); + + writeId(Element); + if (pAttrList) + writeFastAttributeList(*pAttrList); + else + writeTokenValueList(); + + writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket)); + } + + css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const + { + return maCachedOutputStream.getOutputStream(); + } + + void FastSaxSerializer::writeTokenValueList() + { +#ifdef DBG_UTIL + ::std::set<OString> DebugAttributes; +#endif + for (const TokenValue & rTokenValue : maTokenValues) + { + writeBytes(sSpace, N_CHARS(sSpace)); + + sal_Int32 nToken = rTokenValue.nToken; + writeId(nToken); + +#ifdef DBG_UTIL + // Well-formedness constraint: Unique Att Spec + OString const nameId(getId(nToken)); + assert(DebugAttributes.find(nameId) == DebugAttributes.end()); + DebugAttributes.insert(nameId); +#endif + + writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote)); + + write(rTokenValue.pValue, -1, true); + + writeBytes(sQuote, N_CHARS(sQuote)); + } + maTokenValues.clear(); + } + + void FastSaxSerializer::writeFastAttributeList(FastAttributeList const & rAttrList) + { +#ifdef DBG_UTIL + ::std::set<OString> DebugAttributes; +#endif + const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens(); + for (size_t j = 0; j < Tokens.size(); j++) + { + writeBytes(sSpace, N_CHARS(sSpace)); + + sal_Int32 nToken = Tokens[j]; + writeId(nToken); + +#ifdef DBG_UTIL + // Well-formedness constraint: Unique Att Spec + OString const nameId(getId(nToken)); + SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId ); + assert(DebugAttributes.find(nameId) == DebugAttributes.end()); + DebugAttributes.insert(nameId); +#endif + + writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote)); + + const char* pAttributeValue = rAttrList.getFastAttributeValue(j); + + // tdf#117274 don't escape the special VML shape type id "#_x0000_t202" + bool bEscape = !(pAttributeValue + && *pAttributeValue != '\0' + && (*pAttributeValue == '#' + ? strncmp(pAttributeValue, "#_x0000_t", 9) == 0 + : strncmp(pAttributeValue, "_x0000_t", 8) == 0)); + + write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape); + + writeBytes(sQuote, N_CHARS(sQuote)); + } + } + + void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder) + { + if (rOrder.hasElements()) + { + auto pSort = std::make_shared<ForSort>(nTag, rOrder); + maMarkStack.push( pSort ); + maCachedOutputStream.setOutput( pSort ); + } + else + { + auto pMerge = std::make_shared<ForMerge>(nTag); + maMarkStack.push( pMerge ); + maCachedOutputStream.setOutput( pMerge ); + } + mbMarkStackEmpty = false; + } + +#ifdef DBG_UTIL + static void lcl_DebugMergeAppend( + std::deque<sal_Int32> & rLeftEndedElements, + std::deque<sal_Int32> & rLeftStartedElements, + std::deque<sal_Int32> & rRightEndedElements, + std::deque<sal_Int32> & rRightStartedElements) + { + while (!rRightEndedElements.empty()) + { + if (rLeftStartedElements.empty()) + { + rLeftEndedElements.push_back(rRightEndedElements.front()); + } + else + { + assert(rLeftStartedElements.back() == rRightEndedElements.front()); + rLeftStartedElements.pop_back(); + } + rRightEndedElements.pop_front(); + } + while (!rRightStartedElements.empty()) + { + rLeftStartedElements.push_back(rRightStartedElements.front()); + rRightStartedElements.pop_front(); + } + } + + static void lcl_DebugMergePrepend( + std::deque<sal_Int32> & rLeftEndedElements, + std::deque<sal_Int32> & rLeftStartedElements, + std::deque<sal_Int32> & rRightEndedElements, + std::deque<sal_Int32> & rRightStartedElements) + { + while (!rLeftStartedElements.empty()) + { + if (rRightEndedElements.empty()) + { + rRightStartedElements.push_front(rLeftStartedElements.back()); + } + else + { + assert(rRightEndedElements.front() == rLeftStartedElements.back()); + rRightEndedElements.pop_front(); + } + rLeftStartedElements.pop_back(); + } + while (!rLeftEndedElements.empty()) + { + rRightEndedElements.push_front(rLeftEndedElements.back()); + rLeftEndedElements.pop_back(); + } + } +#endif + + void FastSaxSerializer::mergeTopMarks( + sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType) + { + SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge"); + assert(!mbMarkStackEmpty); // should never happen + if ( mbMarkStackEmpty ) + return; + + assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!"); + (void) nTag; +#ifdef DBG_UTIL + if (dynamic_cast<ForSort*>(maMarkStack.top().get())) + { + // Sort is always well-formed fragment + assert(maMarkStack.top()->m_DebugStartedElements.empty()); + assert(maMarkStack.top()->m_DebugEndedElements.empty()); + } + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + maMarkStack.top()->m_DebugPostponedEndedElements, + maMarkStack.top()->m_DebugPostponedStartedElements); +#endif + + // flush, so that we get everything in getData() + maCachedOutputStream.flush(); + + if (maMarkStack.size() == 1) + { +#ifdef DBG_UTIL + while (!maMarkStack.top()->m_DebugEndedElements.empty()) + { + assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top()); + maMarkStack.top()->m_DebugEndedElements.pop_front(); + m_DebugStartedElements.pop(); + } + while (!maMarkStack.top()->m_DebugStartedElements.empty()) + { + m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front()); + maMarkStack.top()->m_DebugStartedElements.pop_front(); + } +#endif + Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() ); + maMarkStack.pop(); + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() ); + return; + } + +#ifdef DBG_UTIL + ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements); + ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements); +#endif + const Int8Sequence aMerge( maMarkStack.top()->getData() ); + maMarkStack.pop(); +#ifdef DBG_UTIL + switch (eMergeType) + { + case MergeMarks::APPEND: + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + topDebugEndedElements, + topDebugStartedElements); + break; + case MergeMarks::PREPEND: + if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh... + { + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements, + topDebugEndedElements, + topDebugStartedElements); + } + else + { + lcl_DebugMergePrepend( + topDebugEndedElements, + topDebugStartedElements, + maMarkStack.top()->m_DebugEndedElements, + maMarkStack.top()->m_DebugStartedElements); + } + break; + case MergeMarks::POSTPONE: + lcl_DebugMergeAppend( + maMarkStack.top()->m_DebugPostponedEndedElements, + maMarkStack.top()->m_DebugPostponedStartedElements, + topDebugEndedElements, + topDebugStartedElements); + break; + } +#endif + if (maMarkStack.empty()) + { + mbMarkStackEmpty = true; + maCachedOutputStream.resetOutputToStream(); + } + else + { + maCachedOutputStream.setOutput( maMarkStack.top() ); + } + + switch ( eMergeType ) + { + case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break; + case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break; + case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break; + } + } + + void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData ) + { + maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() ); + } + + void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen ) + { +#if OSL_DEBUG_LEVEL > 0 + { + bool bGood = true; + for (size_t i=0; i < nLen;) + { + if (auto const inv = invalidChar(pStr, nLen, i)) + { + bGood = false; + SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" << + std::hex << inv->first); + i += inv->second; + continue; + } + ++i; + } + SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'"); + } +#endif + maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen ); + } + + FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData() + { + merge( maData, maPostponed, true ); + maPostponed.realloc( 0 ); + + return maData; + } + +#if OSL_DEBUG_LEVEL > 0 + void FastSaxSerializer::ForMerge::print( ) + { + std::cerr << "Data: "; + for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ ) + { + std::cerr << maData[i]; + } + + std::cerr << "\nPostponed: "; + for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ ) + { + std::cerr << maPostponed[i]; + } + + std::cerr << "\n"; + } +#endif + + void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat ) + { + merge( maData, rWhat, false ); + } + + void FastSaxSerializer::ForMerge::append( const css::uno::Sequence<sal_Int8> &rWhat ) + { + merge( maData, rWhat, true ); + } + + void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat ) + { + merge( maPostponed, rWhat, true ); + } + + void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend ) + { + sal_Int32 nMergeLen = rMerge.getLength(); + if ( nMergeLen <= 0 ) + return; + + sal_Int32 nTopLen = rTop.getLength(); + + rTop.realloc( nTopLen + nMergeLen ); + if ( bAppend ) + { + // append the rMerge to the rTop + memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen ); + } + else + { + // prepend the rMerge to the rTop + memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen ); + memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen ); + } + } + + void FastSaxSerializer::ForMerge::resetData( ) + { + maData = Int8Sequence(); + } + + void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement ) + { + const auto & rOrder = maOrder; + if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() ) + { + mnCurrentElement = nElement; + if ( maData.find( nElement ) == maData.end() ) + maData[ nElement ] = Int8Sequence(); + } + } + + void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat ) + { + append( rWhat ); + } + + void FastSaxSerializer::ForSort::append( const css::uno::Sequence<sal_Int8> &rWhat ) + { + merge( maData[mnCurrentElement], rWhat, true ); + } + + void FastSaxSerializer::ForSort::sort() + { + // Clear the ForMerge data to avoid duplicate items + resetData(); + + // Sort it all + std::map< sal_Int32, Int8Sequence >::iterator iter; + for ( const auto nIndex : std::as_const(maOrder) ) + { + iter = maData.find( nIndex ); + if ( iter != maData.end() ) + ForMerge::append( iter->second ); + } + } + + FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData() + { + sort( ); + return ForMerge::getData(); + } + +#if OSL_DEBUG_LEVEL > 0 + void FastSaxSerializer::ForSort::print( ) + { + for ( const auto& [rElement, rData] : maData ) + { + std::cerr << "pair: " << rElement; + for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i ) + std::cerr << rData[i]; + std::cerr << "\n"; + } + + sort( ); + ForMerge::print(); + } +#endif + +} // namespace sax_fastparser + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx new file mode 100644 index 0000000000..8d97caf305 --- /dev/null +++ b/sax/source/tools/fastserializer.hxx @@ -0,0 +1,257 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX +#define INCLUDED_SAX_SOURCE_TOOLS_FASTSERIALIZER_HXX + +#include <com/sun/star/xml/sax/XFastTokenHandler.hpp> +#include <com/sun/star/io/XOutputStream.hpp> + +#include <sax/fastattribs.hxx> +#include <sax/fshelper.hxx> +#include "CachedOutputStream.hxx" + +#include <stack> +#include <string_view> +#include <map> +#include <memory> + +namespace sax_fastparser { + +struct TokenValue +{ + sal_Int32 nToken; + const char *pValue; + TokenValue(sal_Int32 _nToken, const char *_pValue) : nToken(_nToken), pValue(_pValue) {} +}; +typedef std::vector<TokenValue> TokenValueList; + +/// Receives notification of sax document events to write into an XOutputStream. +class FastSaxSerializer +{ + typedef css::uno::Sequence< ::sal_Int8 > Int8Sequence; + typedef css::uno::Sequence< ::sal_Int32 > Int32Sequence; + +public: + explicit FastSaxSerializer(const css::uno::Reference< css::io::XOutputStream >& xOutputStream); + ~FastSaxSerializer(); + + css::uno::Reference< css::io::XOutputStream > const & getOutputStream() const; + /// called by FSHelper to put data in for writeTokenValueList + TokenValueList& getTokenValueList() { return maTokenValues; } + + /** called by the parser when parsing of an XML stream is started. + */ + void startDocument(); + + /** called by the parser after the last XML element of a stream is processed. + */ + void endDocument(); + + /** receives notification of the beginning of an element. + + @param Element + contains the integer token from the <type>XFastTokenHandler</type> + registered at the <type>XFastParser</type>.<br> + + If the element has a namespace that was registered with the + <type>XFastParser</type>, <param>Element</param> contains the integer + token of the elements local name from the <type>XFastTokenHandler</type> + and the integer token of the namespace combined with an arithmetic + <b>or</b> operation. + + @param pAttrList + Contains a <type>FastAttributeList</type> to access the attributes + from the element. + + */ + void startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr ); + + /** receives notification of the end of a known element. + @see startFastElement + */ + void endFastElement( ::sal_Int32 Element ); + + /** receives notification of the beginning of a single element. + + @param Element + contains the integer token from the <type>XFastTokenHandler</type> + registered at the <type>XFastParser</type>.<br> + + If the element has a namespace that was registered with the + <type>XFastParser</type>, <param>Element</param> contains the integer + token of the elements local name from the <type>XFastTokenHandler</type> + and the integer token of the namespace combined with an arithmetic + <b>or</b> operation. + + @param pAttrList + Contains a <type>FastAttributeList</type> to access the attributes + from the element. + + */ + void singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList = nullptr ); + + // C++ helpers + void writeId( ::sal_Int32 Element ); + OString getId( ::sal_Int32 Element ); + + void write( double value ); + void write( std::u16string_view s, bool bEscape = false ); + void write( std::string_view s, bool bEscape = false ); + void write( const char* pStr, sal_Int32 nLen, bool bEscape = false ); + + // strings with _xHHHH_ are escaped with _x005F unless this is disabled + void setAllowXEscape(bool bSet) { mbXescape = bSet; } + +public: + /** From now on, don't write directly to the stream, but to top of a stack. + + This is to be able to change the order of the data being written. + If you need to write eg. + p, r, rPr, [something], /rPr, t, [text], /t, /r, /p, + but get it in order + p, r, t, [text], /t, rPr, [something], /rPr, /r, /p, + simply do + p, r, mark(), t, [text], /t, mark(), rPr, [something], /rPr, + mergeTopMarks( MergeMarks::PREPEND ), mergeTopMarks( MergeMarks::APPEND ), /r, /p + and you are done. + + @param nTag debugging aid to ensure mark and merge match in LIFO order + */ + void mark(sal_Int32 nTag, const Int32Sequence& rOrder); + + /** Merge 2 topmost marks. + + The possibilities: prepend the top before the second top-most + mark, append it, append it later or ignore; prepending brings the possibility + to switch parts of the output, appending later allows to write some + output in advance. + + Writes the result to the output stream if the mark stack becomes empty + by the operation. + + When the MergeMarks::POSTPONE is specified, the merge happens just + before the next merge. + + @param nTag debugging aid to ensure mark and merge match in LIFO order + + @see mark() + */ + void mergeTopMarks(sal_Int32 nTag, + sax_fastparser::MergeMarks eMergeType); + +private: + /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append. + * Its flush method needs to be called before touching maMarkStack + * to ensure correct order of ForSort methods. + */ + CachedOutputStream maCachedOutputStream; + css::uno::Reference< css::xml::sax::XFastTokenHandler > mxFastTokenHandler; + + class ForMerge : public ForMergeBase + { + Int8Sequence maData; + Int8Sequence maPostponed; + + public: + sal_Int32 const m_Tag; +#ifdef DBG_UTIL + // pending close tags, followed by pending open tags + std::deque<sal_Int32> m_DebugEndedElements; + std::deque<sal_Int32> m_DebugStartedElements; + // ... and another buffer for maPostponed ... + std::deque<sal_Int32> m_DebugPostponedEndedElements; + std::deque<sal_Int32> m_DebugPostponedStartedElements; +#endif + + explicit ForMerge(sal_Int32 const nTag) : m_Tag(nTag) {} + + virtual void setCurrentElement( ::sal_Int32 /*nToken*/ ) {} + virtual Int8Sequence& getData(); +#if OSL_DEBUG_LEVEL > 0 + virtual void print(); +#endif + + virtual void prepend( const Int8Sequence &rWhat ); + virtual void append( const css::uno::Sequence<sal_Int8> &rWhat ) override; + void postpone( const Int8Sequence &rWhat ); + + protected: + void resetData( ); + static void merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend ); + }; + + class ForSort : public ForMerge + { + std::map< ::sal_Int32, Int8Sequence > maData; + sal_Int32 mnCurrentElement; + + Int32Sequence maOrder; + + public: + ForSort(sal_Int32 const nTag, const Int32Sequence& rOrder) + : ForMerge(nTag) + , mnCurrentElement( 0 ) + , maOrder( rOrder ) + {} + + void setCurrentElement( ::sal_Int32 nToken ) override; + + virtual Int8Sequence& getData() override; + +#if OSL_DEBUG_LEVEL > 0 + virtual void print() override; +#endif + + virtual void prepend( const Int8Sequence &rWhat ) override; + virtual void append( const css::uno::Sequence<sal_Int8> &rWhat ) override; + private: + void sort(); + }; + + std::stack< std::shared_ptr< ForMerge > > maMarkStack; + bool mbMarkStackEmpty; + // Would be better to use OStringBuffer instead of these two + // but then we couldn't get the rtl_String* member :-( + rtl_String *mpDoubleStr; + sal_Int32 mnDoubleStrCapacity; + TokenValueList maTokenValues; + bool mbXescape; ///< whether to escape invalid XML characters as _xHHHH_ in write(const char*,sal_Int32,true) + + +#ifdef DBG_UTIL + std::stack<sal_Int32> m_DebugStartedElements; +#endif + + void writeTokenValueList(); + void writeFastAttributeList(FastAttributeList const & rAttrList); + + /** Forward the call to the output stream, or write to the stack. + + The latter in the case that we are inside a mark(). + */ + void writeBytes( const css::uno::Sequence< ::sal_Int8 >& aData ); + void writeBytes( const char* pStr, size_t nLen ); +}; + +} // namespace sax_fastparser + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/source/tools/fshelper.cxx b/sax/source/tools/fshelper.cxx new file mode 100644 index 0000000000..f5945d67a9 --- /dev/null +++ b/sax/source/tools/fshelper.cxx @@ -0,0 +1,174 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/log.hxx> +#include <sax/fshelper.hxx> +#include "fastserializer.hxx" + +using namespace ::com::sun::star; +using namespace ::com::sun::star::uno; + +namespace sax_fastparser { + +FastSerializerHelper::FastSerializerHelper(const Reference< io::XOutputStream >& xOutputStream, bool bWriteHeader ) : + mpSerializer(new FastSaxSerializer(xOutputStream)) +{ + if( bWriteHeader ) + startDocument(); +} + +void FastSerializerHelper::startDocument() +{ + mpSerializer->startDocument(); +} + +void FastSerializerHelper::endDocument() +{ + std::unique_ptr<FastSaxSerializer> xSerializer(std::move(mpSerializer)); + xSerializer->endDocument(); +} + +FastSerializerHelper::~FastSerializerHelper() +{ + if (mpSerializer) + { + assert(false && "call endDocument explicitly before dtor to avoid potential exceptions during dtor"); + endDocument(); + } +} + +void FastSerializerHelper::startElement(sal_Int32 elementTokenId) +{ + mpSerializer->startFastElement(elementTokenId); +} +void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const char* value) +{ + mpSerializer->getTokenValueList().emplace_back(attribute, value); +} +void FastSerializerHelper::pushAttributeValue(sal_Int32 attribute, const OString& value) +{ + mpSerializer->getTokenValueList().emplace_back(attribute, value.getStr()); +} +void FastSerializerHelper::singleElement(sal_Int32 elementTokenId) +{ + mpSerializer->singleFastElement(elementTokenId); +} + +void FastSerializerHelper::endElement(sal_Int32 elementTokenId) +{ + mpSerializer->endFastElement(elementTokenId); +} + +void FastSerializerHelper::startElement(sal_Int32 elementTokenId, const rtl::Reference<FastAttributeList>& xAttrList) +{ + assert(xAttrList); + mpSerializer->startFastElement(elementTokenId, xAttrList.get()); +} + +void FastSerializerHelper::singleElement(sal_Int32 elementTokenId, const rtl::Reference<FastAttributeList>& xAttrList) +{ + assert(xAttrList); + mpSerializer->singleFastElement(elementTokenId, xAttrList.get()); +} + +FastSerializerHelper* FastSerializerHelper::write(const char* value) +{ + mpSerializer->write(value, -1); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(const OString& value) +{ + mpSerializer->write(value); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(std::u16string_view value) +{ + mpSerializer->write(value); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(sal_Int32 value) +{ + mpSerializer->write(OString::number(value)); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(sal_Int64 value) +{ + mpSerializer->write(OString::number(value)); + return this; +} + +FastSerializerHelper* FastSerializerHelper::write(double value) +{ + mpSerializer->write(value); + return this; +} + +FastSerializerHelper* FastSerializerHelper::writeEscaped(const char* value) +{ + mpSerializer->write(value, -1, true); + return this; +} + +FastSerializerHelper* FastSerializerHelper::writeEscaped(std::u16string_view value) +{ + if (!value.empty()) + mpSerializer->write(value, true); + return this; +} + +FastSerializerHelper* FastSerializerHelper::writeId(sal_Int32 tokenId) +{ + mpSerializer->writeId(tokenId); + return this; +} + +css::uno::Reference< css::io::XOutputStream > const & FastSerializerHelper::getOutputStream() const +{ + return mpSerializer->getOutputStream(); +} + +void FastSerializerHelper::mark( + sal_Int32 const nTag, const Sequence<sal_Int32>& rOrder) +{ + mpSerializer->mark(nTag, rOrder); +} + +void FastSerializerHelper::mergeTopMarks( + sal_Int32 const nTag, MergeMarks const eMergeType) +{ + mpSerializer->mergeTopMarks(nTag, eMergeType); +} + +rtl::Reference<FastAttributeList> FastSerializerHelper::createAttrList() +{ + return new FastAttributeList( nullptr ); +} + +void FastSerializerHelper::setAllowXEscape(bool bSet) +{ + mpSerializer->setAllowXEscape(bSet); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/test/makefile.mk b/sax/test/makefile.mk new file mode 100644 index 0000000000..e2ae1546c0 --- /dev/null +++ b/sax/test/makefile.mk @@ -0,0 +1,58 @@ +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# + +PRJ=.. + +PRJNAME=extensions +TARGET=workben +LIBTARGET=NO + +TARGETTYPE=CUI +ENABLE_EXCEPTIONS=TRUE + +# --- Settings ----------------------------------------------------- + +.INCLUDE : settings.mk +# --- Files -------------------------------------------------------- + +.IF "$(BUILD_TYPE)" == "$(BUILD_TYPE:s/DESKTOP//)" + +ALL: +# nothing + +.ENDIF + +# +# std testcomponent +# +APP1TARGET = testcomponent +APP2TARGET = saxdemo + +APP1OBJS = $(OBJ)$/testcomponent.obj +APP1STDLIBS = $(SALLIB) \ + $(CPPULIB)\ + $(CPPUHELPERLIB) + +APP2OBJS = $(OBJ)$/saxdemo.obj +APP2STDLIBS = $(SALLIB) \ + $(CPPULIB) \ + $(CPPUHELPERLIB) + +# --- Targets ------------------------------------------------------ + +.INCLUDE : target.mk diff --git a/sax/test/sax/exports.dxp b/sax/test/sax/exports.dxp new file mode 100644 index 0000000000..86214860d5 --- /dev/null +++ b/sax/test/sax/exports.dxp @@ -0,0 +1,2 @@ +component_getFactory +component_writeInfo diff --git a/sax/test/sax/factory.hxx b/sax/test/sax/factory.hxx new file mode 100644 index 0000000000..688691c9cd --- /dev/null +++ b/sax/test/sax/factory.hxx @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SAX_TEST_SAX_FACTORY_HXX +#define INCLUDED_SAX_TEST_SAX_FACTORY_HXX + +#include <rtl/strbuf.hxx> + +namespace sax_test { +Reference< XInterface > SAL_CALL OSaxWriterTest_CreateInstance( + const Reference< XMultiServiceFactory > & rSMgr ) throw ( Exception ); +OUString OSaxWriterTest_getServiceName( ) throw(); +OUString OSaxWriterTest_getImplementationName( ) throw(); +Sequence<OUString> OSaxWriterTest_getSupportedServiceNames( ) throw(); +} +#define BUILD_ERROR(expr, Message)\ + {\ + m_seqErrors.realloc( m_seqErrors.getLength() + 1 ); \ + m_seqExceptions.realloc( m_seqExceptions.getLength() + 1 ); \ + OStringBuffer str(128); \ + str.append( __FILE__ );\ + str.append( " " ); \ + str.append( "(" ); \ + str.append( OString::valueOf( (sal_Int32)__LINE__) );\ + str.append(")\n" );\ + str.append( "[ " ); \ + str.append( #expr ); \ + str.append( " ] : " ); \ + str.append( Message ); \ + m_seqErrors.getArray()[ m_seqErrors.getLength()-1] =\ + OStringToOUString( str.makeStringAndClear() , RTL_TEXTENCODING_ASCII_US ); \ + }\ + ((void)0) + + +#define WARNING_ASSERT(expr, Message) \ + if( ! (expr) ) { \ + m_seqWarnings.realloc( m_seqErrors.getLength() +1 ); \ + OStringBuffer str(128);\ + str.append( __FILE__);\ + str.append( " "); \ + str.append( "(" ); \ + str.append(OString::valueOf( (sal_Int32)__LINE__)) ;\ + str.append( ")\n");\ + str.append( "[ " ); \ + str.append( #expr ); \ + str.append( " ] : ") ; \ + str.append( Message); \ + m_seqWarnings.getArray()[ m_seqWarnings.getLength()-1] =\ + OStringToOUString( str.makeStringAndClear() , RTL_TEXTENCODING_ASCII_US ); \ + return; \ + }\ + ((void)0) + +#define ERROR_ASSERT(expr, Message) \ + if( ! (expr) ) { \ + BUILD_ERROR(expr, Message );\ + return; \ + }\ + ((void)0) + +#endif // INCLUDED_SAX_TEST_SAX_FACTORY_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/test/sax/makefile.mk b/sax/test/sax/makefile.mk new file mode 100644 index 0000000000..9aa5864ca6 --- /dev/null +++ b/sax/test/sax/makefile.mk @@ -0,0 +1,52 @@ +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This file incorporates work covered by the following license notice: +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to you under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.apache.org/licenses/LICENSE-2.0 . +# +PRJ=..$/.. + +PRJNAME=extensions +TARGET=testsax +USE_DEFFILE=TRUE +ENABLE_EXCEPTIONS=TRUE +# --- Settings ----------------------------------------------------- +.INCLUDE : settings.mk + +# --- Files -------------------------------------------------------- + + +SLOFILES = $(SLO)$/testsax.obj \ + $(SLO)$/testwriter.obj + +SHL1TARGET= $(TARGET) +SHL1IMPLIB= i$(TARGET) + +SHL1STDLIBS= \ + $(SALLIB) \ + $(CPPULIB) \ + $(CPPUHELPERLIB) + + +SHL1LIBS= $(SLB)$/$(TARGET).lib +SHL1DEPN= makefile.mk $(SHL1LIBS) +SHL1DEF= $(MISC)$/$(SHL1TARGET).def + +DEF1NAME= $(SHL1TARGET) +DEF1EXPORTFILE= exports.dxp + + +# --- Targets ------------------------------------------------------ + +.INCLUDE : target.mk diff --git a/sax/test/sax/testsax.cxx b/sax/test/sax/testsax.cxx new file mode 100644 index 0000000000..b6fa70a938 --- /dev/null +++ b/sax/test/sax/testsax.cxx @@ -0,0 +1,794 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <stdio.h> +#include <string.h> + +#include <osl/time.h> +#include <osl/diagnose.h> + +#include <com/sun/star/test/XSimpleTest.hpp> +#include <com/sun/star/io/XOutputStream.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp> + +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implbase.hxx> + +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::test; +using namespace ::com::sun::star::registry; +using namespace ::com::sun::star::io; +using namespace ::com::sun::star::xml::sax; + +// test scenarios + +namespace sax_test { + +class OSaxParserTest : public WeakImplHelper< XSimpleTest > +{ +public: + explicit OSaxParserTest( const Reference < XMultiServiceFactory > & rFactory ) : m_rFactory( rFactory ) + { + } + + virtual void SAL_CALL testInvariant( + const OUString& TestName, + const Reference < XInterface >& TestObject) + throw ( IllegalArgumentException, RuntimeException); + + virtual sal_Int32 SAL_CALL test( + const OUString& TestName, + const Reference < XInterface >& TestObject, + sal_Int32 hTestHandle) + throw ( IllegalArgumentException,RuntimeException); + + virtual sal_Bool SAL_CALL testPassed() throw (RuntimeException); + virtual Sequence< OUString > SAL_CALL getErrors() throw (RuntimeException); + virtual Sequence< Any > SAL_CALL getErrorExceptions() throw (RuntimeException); + virtual Sequence< OUString > SAL_CALL getWarnings() throw (RuntimeException); + +private: + void testSimple( const Reference < XParser > &r ); + void testNamespaces( const Reference < XParser > &r ); + void testFile( const Reference < XParser > &r ); + void testEncoding( const Reference < XParser > &rParser ); + void testPerformance( const Reference < XParser > &rParser ); + + Sequence<Any> m_seqExceptions; + Sequence<OUString> m_seqErrors; + Sequence<OUString> m_seqWarnings; + Reference < XMultiServiceFactory > m_rFactory; +}; + +/// @note for external binding +Reference < XInterface > SAL_CALL OSaxParserTest_CreateInstance( const Reference < XMultiServiceFactory > & rSMgr ) throw(Exception) +{ + return getXWeak(new OSaxParserTest( rSMgr )); +} + +OUString OSaxParserTest_getServiceName( ) throw () +{ + return OUString( "test.com.sun.star.xml.sax.Parser" ); +} + +OUString OSaxParserTest_getImplementationName( ) throw () +{ + return OUString( "test.extensions.xml.sax.Parser"); +} + +Sequence<OUString> OSaxParserTest_getSupportedServiceNames( ) throw () +{ + Sequence<OUString> aRet { OSaxParserTest_getImplementationName() }; + return aRet; +} + +void OSaxParserTest::testInvariant( + const OUString& TestName, + const Reference < XInterface >& TestObject ) + throw ( IllegalArgumentException, RuntimeException) +{ + if( OUString( "com.sun.star.xml.sax.Parser") == TestName ) { + Reference < XParser > parser( TestObject , UNO_QUERY ); + + ERROR_ASSERT( parser.is() , "XDataInputStream cannot be queried" ); + } +} + +sal_Int32 OSaxParserTest::test( + const OUString& TestName, + const Reference < XInterface >& TestObject, + sal_Int32 hTestHandle) + throw ( IllegalArgumentException, RuntimeException) +{ + if( OUString( "com.sun.star.xml.sax.Parser") == TestName ) { + try + { + if( 0 == hTestHandle ) { + testInvariant( TestName , TestObject ); + } + else { + Reference < XParser > parser( TestObject , UNO_QUERY ); + + if( 1 == hTestHandle ) { + testSimple( parser ); + } + else if( 2 == hTestHandle ) { + testNamespaces( parser ); + } + else if( 3 == hTestHandle ) { + testEncoding( parser ); + } + else if( 4 == hTestHandle ) { + testFile( parser ); + } + else if( 5 == hTestHandle ) { + testPerformance( parser ); + } + } + } + catch( Exception & e ) + { + OString o = OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US); + BUILD_ERROR( 0 , o.getStr() ); + } + catch( ... ) + { + BUILD_ERROR( 0 , "unknown exception (Exception is not base class)" ); + } + + hTestHandle ++; + + if( hTestHandle >= 6) { + // all tests finished. + hTestHandle = -1; + } + } + else { + BUILD_ERROR( 0 , "service not supported by test." ); + } + return hTestHandle; +} + +sal_Bool OSaxParserTest::testPassed() throw (RuntimeException) +{ + return m_seqErrors.getLength() == 0; +} + +Sequence< OUString > OSaxParserTest::getErrors() throw (RuntimeException) +{ + return m_seqErrors; +} + +Sequence< Any > OSaxParserTest::getErrorExceptions() throw (RuntimeException) +{ + return m_seqExceptions; +} + +Sequence< OUString > OSaxParserTest::getWarnings() throw (RuntimeException) +{ + return m_seqWarnings; +} + +Reference < XInputStream > createStreamFromSequence( + const Sequence<sal_Int8> seqBytes , + const Reference < XMultiServiceFactory > &xSMgr ) +{ + Reference < XInterface > xOutStreamService = + xSMgr->createInstance("com.sun.star.io.Pipe"); + OSL_ASSERT( xOutStreamService.is() ); + Reference< XOutputStream > rOutStream( xOutStreamService , UNO_QUERY ); + OSL_ASSERT( rOutStream.is() ); + + Reference< XInputStream > rInStream( xOutStreamService , UNO_QUERY ); + OSL_ASSERT( rInStream.is() ); + + rOutStream->writeBytes( seqBytes ); + rOutStream->flush(); + rOutStream->closeOutput(); + + return rInStream; +} + +Reference< XInputStream > createStreamFromFile( + const char *pcFile , + const Reference < XMultiServiceFactory > &xSMgr ) +{ + FILE *f = fopen( pcFile , "rb" ); + Reference< XInputStream > r; + + if( f ) { + fseek( f , 0 , SEEK_END ); + int nLength = ftell( f ); + fseek( f , 0 , SEEK_SET ); + + Sequence<sal_Int8> seqIn(nLength); + fread( seqIn.getArray() , nLength , 1 , f ); + + r = createStreamFromSequence( seqIn , xSMgr ); + fclose( f ); + } + return r; +} + +class TestDocumentHandler : + public WeakImplHelper< XExtendedDocumentHandler , XEntityResolver , XErrorHandler > +{ +public: + TestDocumentHandler( const Reference < XMultiServiceFactory > &r , sal_Bool bPrint ) + : m_bPrint(bPrint), m_xSMgr(r) + { + } + + // Error handler + virtual void SAL_CALL error(const Any& aSAXParseException) throw (SAXException, RuntimeException) + { + printf( "Error !\n" ); + throw SAXException( + OUString( "error from error handler") , + Reference < XInterface >() , + aSAXParseException ); + } + virtual void SAL_CALL fatalError(const Any& aSAXParseException) throw (SAXException, RuntimeException) + { + printf( "Fatal Error !\n" ); + } + virtual void SAL_CALL warning(const Any& aSAXParseException) throw (SAXException, RuntimeException) + { + printf( "Warning !\n" ); + } + + // ExtendedDocumentHandler + virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException) + { + m_iLevel = 0; + m_iElementCount = 0; + m_iAttributeCount = 0; + m_iWhitespaceCount =0; + m_iCharCount=0; + if( m_bPrint ) { + printf( "document started\n" ); + } + } + virtual void SAL_CALL endDocument() throw (SAXException, RuntimeException) + { + if( m_bPrint ) { + printf( "document finished\n" ); + printf( "(ElementCount %d),(AttributeCount %d),(WhitespaceCount %d),(CharCount %d)\n", + m_iElementCount, m_iAttributeCount, m_iWhitespaceCount , m_iCharCount ); + } + } + virtual void SAL_CALL startElement(const OUString& aName, + const Reference< XAttributeList > & xAttribs) + throw (SAXException,RuntimeException) + { + if( m_rLocator.is() ) { + if( m_bPrint ) + { + OString o = OUStringToOString( m_rLocator->getSystemId() , RTL_TEXTENCODING_UTF8 ); + printf( "%s(%d):" , o.getStr() , m_rLocator->getLineNumber() ); + } + } + if( m_bPrint ) { + int i; + for( i = 0; i < m_iLevel ; i ++ ) { + printf( " " ); + } + OString o = OUStringToOString(aName , RTL_TEXTENCODING_UTF8 ); + printf( "<%s> " , aName.getStr() ); + + for( i = 0 ; i < xAttribs->getLength() ; i ++ ) + { + OString o1 = OUStringToOString(xAttribs->getNameByIndex( i ), RTL_TEXTENCODING_UTF8 ); + OString o2 = OUStringToOString(xAttribs->getTypeByIndex( i ), RTL_TEXTENCODING_UTF8 ); + OString o3 = OUStringToOString(xAttribs->getValueByIndex( i ) , RTL_TEXTENCODING_UTF8 ); + printf( "(%s,%s,'%s')" , o1.getStr(), o2.getStr(), o3.getStr() ); + } + printf( "\n" ); + } + m_iLevel ++; + m_iElementCount ++; + m_iAttributeCount += xAttribs->getLength(); + } + + virtual void SAL_CALL endElement(const OUString& aName) throw (SAXException,RuntimeException) + { + OSL_ASSERT( m_iLevel ); + m_iLevel --; + if( m_bPrint ) { + int i; + for( i = 0; i < m_iLevel ; i ++ ) { + printf( " " ); + } + OString o = OUStringToOString(aName , RTL_TEXTENCODING_UTF8 ); + printf( "</%s>\n" , o.getStr() ); + } + } + + virtual void SAL_CALL characters(const OUString& aChars) throw (SAXException,RuntimeException) + { + if( m_bPrint ) { + int i; + for( i = 0; i < m_iLevel ; i ++ ) { + printf( " " ); + } + OString o = OUStringToOString(aChars , RTL_TEXTENCODING_UTF8 ); + printf( "%s\n" , o.getStr() ); + } + m_iCharCount += aChars.getLength(); + } + + virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) throw (SAXException,RuntimeException) + { + m_iWhitespaceCount += aWhitespaces.getLength(); + } + + virtual void SAL_CALL processingInstruction(const OUString& aTarget, const OUString& aData) throw (SAXException,RuntimeException) + { + if( m_bPrint ) + { + OString o1 = OUStringToOString(aTarget, RTL_TEXTENCODING_UTF8 ); + OString o2 = OUStringToOString(aData, RTL_TEXTENCODING_UTF8 ); + printf( "PI : %s,%s\n" , o1.getStr() , o2.getStr() ); + } + } + + virtual void SAL_CALL setDocumentLocator(const Reference< XLocator> & xLocator) + throw (SAXException,RuntimeException) + { + m_rLocator = xLocator; + } + + virtual InputSource SAL_CALL resolveEntity( + const OUString& sPublicId, + const OUString& sSystemId) + throw (SAXException,RuntimeException) + { + InputSource source; + source.sSystemId = sSystemId; + source.sPublicId = sPublicId; + + source.aInputStream = createStreamFromFile( + OUStringToOString( sSystemId , RTL_TEXTENCODING_ASCII_US) , m_xSMgr ); + + return source; + } + + virtual void SAL_CALL startCDATA() throw (SAXException,RuntimeException) + { + if( m_bPrint ) { + printf( "CDataStart :\n" ); + } + } + virtual void SAL_CALL endCDATA() throw (SAXException,RuntimeException) + { + if( m_bPrint ) { + printf( "CEndStart :\n" ); + } + } + virtual void SAL_CALL comment(const OUString& sComment) throw (SAXException,RuntimeException) + { + if( m_bPrint ) { + OString o1 = OUStringToOString(sComment, RTL_TEXTENCODING_UTF8 ); + printf( "<!--%s-->\n" , o1.getStr() ); + } + } + virtual void SAL_CALL unknown(const OUString& sString) throw (SAXException,RuntimeException) + { + if( m_bPrint ) + { + OString o1 = OUStringToOString(sString, RTL_TEXTENCODING_UTF8 ); + printf( "UNKNOWN : {%s}\n" , o1.getStr() ); + } + } + + virtual void SAL_CALL allowLineBreak() throw (SAXException, RuntimeException ) + { + + } + + int m_iLevel; + int m_iElementCount; + int m_iAttributeCount; + int m_iWhitespaceCount; + int m_iCharCount; + sal_Bool m_bPrint; + + Reference < XMultiServiceFactory > m_xSMgr; + Reference < XLocator > m_rLocator; +}; + +void OSaxParserTest::testSimple( const Reference < XParser > &rParser ) +{ + char TestString[] = "<!DOCTYPE personnel [\n" + "<!ENTITY testInternal \"internal Test!\">\n" + "<!ENTITY test SYSTEM \"external_entity.xml\">\n" + "]>\n" + "<personnel>\n" + "<person> fjklsfdklsdfkl\n" + "fjklsfdklsdfkl\n" + "<?testpi pidata?>\n" + "&testInternal;\n" + "<HUHU x='5' y='kjfd'> blahuhu\n" + "<HI> blahi\n" + " <![CDATA[<greeting>Hello, '+1+12world!</greeting>]]>\n" + " <!-- huhu <jdk> -->\n" + "<?testpi pidata?>\n" + "</HI>\n" + "aus XMLTest\n" + "</HUHU>\n" + "</person>\n" + "</personnel>\n\n\n"; + + Sequence< sal_Int8> seqBytes( strlen( TestString ) ); + memcpy( seqBytes.getArray() , TestString , strlen( TestString ) ); + + Reference< XInputStream > rInStream; + OUString sInput; + rInStream = createStreamFromSequence( seqBytes , m_rFactory ); + sInput = "internal"; + + if( rParser.is() ) { + InputSource source; + + source.aInputStream = rInStream; + source.sSystemId = sInput; + + TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False ); + Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY ); + Reference< XEntityResolver > + rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY ); + + rParser->setDocumentHandler( rDocHandler ); + rParser->setEntityResolver( rEntityResolver ); + + try + { + rParser->parseStream( source ); + ERROR_ASSERT( pDocHandler->m_iElementCount == 4 , "wrong element count" ); + ERROR_ASSERT( pDocHandler->m_iAttributeCount == 2 , "wrong attribute count" ); + ERROR_ASSERT( pDocHandler->m_iCharCount == 130 , "wrong char count" ); + ERROR_ASSERT( pDocHandler->m_iWhitespaceCount == 0, "wrong whitespace count" ); + } + catch( SAXParseException & e ) + { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + BUILD_ERROR( 1 , o1.getStr() ); + } + catch( SAXException & e ) + { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + BUILD_ERROR( 1 , o1.getStr() ); + } + catch( Exception & e ) + { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + BUILD_ERROR( 1 , o1.getStr() ); + } + catch( ... ) + { + BUILD_ERROR( 1 , "unknown exception" ); + } + } +} + +void OSaxParserTest::testNamespaces( const Reference < XParser > &rParser ) +{ + + char TestString[] = + "<?xml version='1.0'?>\n" + "<!-- all elements here are explicitly in the HTML namespace -->\n" + "<html:html xmlns:html='http://www.w3.org/TR/REC-html40'>\n" + "<html:head><html:title>Frobnostication</html:title></html:head>\n" + "<html:body><html:p>Moved to \n" + "<html:a href='http://frob.com'>here.</html:a></html:p></html:body>\n" + "</html:html>\n"; + + Sequence<sal_Int8> seqBytes( strlen( TestString ) ); + memcpy( seqBytes.getArray() , TestString , strlen( TestString ) ); + + Reference< XInputStream > rInStream; + OUString sInput; + + rInStream = createStreamFromSequence( seqBytes , m_rFactory ); + sInput = "internal"; + + if( rParser.is() ) { + InputSource source; + + source.aInputStream = rInStream; + source.sSystemId = sInput; + + TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False ); + Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY ); + Reference< XEntityResolver > rEntityResolver( + (XEntityResolver *) pDocHandler , UNO_QUERY ); + + rParser->setDocumentHandler( rDocHandler ); + rParser->setEntityResolver( rEntityResolver ); + + try + { + rParser->parseStream( source ); + ERROR_ASSERT( pDocHandler->m_iElementCount == 6 , "wrong element count" ); + ERROR_ASSERT( pDocHandler->m_iAttributeCount == 2 , "wrong attribute count" ); + ERROR_ASSERT( pDocHandler->m_iCharCount == 33, "wrong char count" ); + ERROR_ASSERT( pDocHandler->m_iWhitespaceCount == 0 , "wrong whitespace count" ); + } + catch( Exception & e ) { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + BUILD_ERROR( 1 , o1.getStr() ); + } + catch( ... ) + { + BUILD_ERROR( 1 , "unknown exception" ); + } + } +} + +void OSaxParserTest::testEncoding( const Reference < XParser > &rParser ) +{ + char TestString[] = + "<?xml version='1.0' encoding=\"iso-8859-1\"?>\n" + "<!-- all elements here are explicitly in the HTML namespace -->\n" + "<html:html xmlns:html='http://www.w3.org/TR/REC-html40'>\n" + "<html:head><html:title>Frobnostication</html:title></html:head>\n" + "<html:body><html:p>Moved to \337\n" + "<html:a href='http://frob.com'>here.</html:a></html:p></html:body>\n" + "</html:html>\n"; + + Sequence<sal_Int8> seqBytes( strlen( TestString ) ); + memcpy( seqBytes.getArray() , TestString , strlen( TestString ) ); + + Reference< XInputStream > rInStream; + OUString sInput; + + rInStream = createStreamFromSequence( seqBytes , m_rFactory ); + sInput = "internal"; + + if( rParser.is() ) { + InputSource source; + + source.aInputStream = rInStream; + source.sSystemId = sInput; + + TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False ); + Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY ); + Reference< XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY ); + + rParser->setDocumentHandler( rDocHandler ); + rParser->setEntityResolver( rEntityResolver ); + try + { + rParser->parseStream( source ); + } + catch( Exception & e ) + { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + BUILD_ERROR( 1 , o1.getStr() ); + } + catch ( ... ) + { + BUILD_ERROR( 1 , "unknown exception" ); + } + } +} + +void OSaxParserTest::testFile( const Reference < XParser > & rParser ) +{ + + Reference< XInputStream > rInStream = createStreamFromFile( "testsax.xml" , m_rFactory ); + OUString sInput = "testsax.xml"; + + if( rParser.is() && rInStream.is() ) { + InputSource source; + + source.aInputStream = rInStream; + source.sSystemId = sInput; + + TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_True ); + Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY ); + Reference < XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY ); + Reference < XErrorHandler > rErrorHandler( ( XErrorHandler * )pDocHandler , UNO_QUERY ); + + rParser->setDocumentHandler( rDocHandler ); + rParser->setEntityResolver( rEntityResolver ); + rParser->setErrorHandler( rErrorHandler ); + + try + { + rParser->parseStream( source ); + } + catch( SAXParseException & e ) { + Any any; + any <<= e; + + while(true) { + SAXParseException *pEx; + if( any.getValueType() == cppu::UnoType<decltype(e)>::get() ) { + pEx = ( SAXParseException * ) any.getValue(); + OString o1 = OUStringToOString(pEx->Message, RTL_TEXTENCODING_UTF8 ); + printf( "%s\n" , o1.getStr() ); + any = pEx->WrappedException; + } + else { + break; + } + } + } + catch( SAXException & e ) + { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + BUILD_ERROR( 1 , o1.getStr() ); + + } + catch( Exception & e ) { + printf( "normal exception ! %s\n", e.Message ); + } + catch ( ... ) + { + printf( "any exception !!!!\n" ); + } + } +} + +void OSaxParserTest::testPerformance( const Reference < XParser > & rParser ) +{ + Reference < XInputStream > rInStream = + createStreamFromFile( "testPerformance.xml" , m_rFactory ); + OUString sInput = "testperformance.xml"; + + if( rParser.is() && rInStream.is() ) { + InputSource source; + + source.aInputStream = rInStream; + source.sSystemId = sInput; + + TestDocumentHandler *pDocHandler = new TestDocumentHandler( m_rFactory , sal_False ); + Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler , UNO_QUERY ); + Reference < XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler , UNO_QUERY ); + Reference < XErrorHandler > rErrorHandler( ( XErrorHandler * )pDocHandler , UNO_QUERY ); + + rParser->setDocumentHandler( rDocHandler ); + rParser->setEntityResolver( rEntityResolver ); + rParser->setErrorHandler( rErrorHandler ); + + try + { + TimeValue aStartTime, aEndTime; + osl_getSystemTime( &aStartTime ); + rParser->parseStream( source ); + osl_getSystemTime( &aEndTime ); + + double fStart = (double)aStartTime.Seconds + ((double)aStartTime.Nanosec / 1000000000.0); + double fEnd = (double)aEndTime.Seconds + ((double)aEndTime.Nanosec / 1000000000.0); + + printf( "Performance reading : %g s\n" , fEnd - fStart ); + + } + catch( SAXParseException &e ) { + Any any; + any <<= e; + while(true) { + if( any.getValueType() == cppu::UnoType<decltype(e)>::get() ) { + SAXParseException ex; + any >>= ex; + OString o = OUStringToOString( ex.Message , RTL_TEXTENCODING_ASCII_US ); + printf( "%s\n" , o.getStr() ); + any <<= ex.WrappedException; + } + else { + break; + } + } + } + catch( SAXException &e ) { + OString o = OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US ); + printf( "%s\n" , o.getStr() ); + + } + catch( ... ) + { + printf( "any exception !!!!\n" ); + } + } +} +} // namespace + +using namespace sax_test; + +extern "C" +{ + +sal_Bool SAL_CALL component_writeInfo( + void * pServiceManager, void * pRegistryKey ) +{ + if (pRegistryKey) + { + try + { + Reference< XRegistryKey > xKey( + reinterpret_cast< XRegistryKey * >( pRegistryKey ) ); + + OUString str = + OUString( "/" ) + + OSaxParserTest_getImplementationName() + + OUString( "/UNO/SERVICES" ); + Reference< XRegistryKey > xNewKey = xKey->createKey( str ); + xNewKey->createKey( OSaxParserTest_getServiceName() ); + + str = + OUString( "/" ) + + OSaxWriterTest_getImplementationName() + + OUString( "/UNO/SERVICES" ); + + xNewKey = xKey->createKey( str ); + xNewKey->createKey( OSaxWriterTest_getServiceName() ); + + return sal_True; + } + catch (InvalidRegistryException &) + { + OSL_FAIL( "### InvalidRegistryException!" ); + } + } + return sal_False; +} + +SAL_DLLPUBLIC_EXPORT void * SAL_CALL component_getFactory( + const char * pImplName, void * pServiceManager, void * pRegistryKey ) +{ + void * pRet = 0; + + if (pServiceManager ) + { + Reference< XSingleServiceFactory > xRet; + Reference< XMultiServiceFactory > xSMgr = + reinterpret_cast< XMultiServiceFactory * > ( pServiceManager ); + + OUString aImplementationName = OUString::createFromAscii( pImplName ); + + + if (aImplementationName == OSaxWriterTest_getImplementationName() ) + { + xRet = createSingleFactory( xSMgr, aImplementationName, + OSaxWriterTest_CreateInstance, + OSaxWriterTest_getSupportedServiceNames() ); + } + else if (aImplementationName == OSaxParserTest_getImplementationName() ) + { + xRet = createSingleFactory( xSMgr, aImplementationName, + OSaxParserTest_CreateInstance, + OSaxParserTest_getSupportedServiceNames() ); + } + if (xRet.is()) + { + xRet->acquire(); + pRet = xRet.get(); + } + } + return pRet; +} + +} // extern C + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/test/sax/testwriter.cxx b/sax/test/sax/testwriter.cxx new file mode 100644 index 0000000000..2a5d3706d4 --- /dev/null +++ b/sax/test/sax/testwriter.cxx @@ -0,0 +1,662 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#include <vector> +#include <stdio.h> + +#include <com/sun/star/test/XSimpleTest.hpp> +#include <com/sun/star/lang/XMultiServiceFactory.hpp> + +#include <com/sun/star/io/XActiveDataSource.hpp> +#include <com/sun/star/io/XOutputStream.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp> + +#include <osl/time.h> + +#include <cppuhelper/factory.hxx> +#include <cppuhelper/implbase.hxx> + + +using namespace ::std; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::test; +using namespace ::com::sun::star::registry; +using namespace ::com::sun::star::io; +using namespace ::com::sun::star::xml::sax; + +namespace sax_test { + +class OFileWriter : + public WeakImplHelper< XOutputStream > +{ +public: + explicit OFileWriter( char *pcFile ) { strncpy( m_pcFile, pcFile, 256 - 1 ); m_f = 0; } + + +public: + virtual void SAL_CALL writeBytes(const Sequence< sal_Int8 >& aData) + throw (NotConnectedException, BufferSizeExceededException, RuntimeException); + virtual void SAL_CALL flush() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException); + virtual void SAL_CALL closeOutput() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException); +private: + char m_pcFile[256]; + FILE *m_f; +}; + + +void OFileWriter::writeBytes(const Sequence< sal_Int8 >& aData) + throw (NotConnectedException, BufferSizeExceededException, RuntimeException) +{ + if( ! m_f ) { + m_f = fopen( m_pcFile , "w" ); + } + + fwrite( aData.getConstArray() , 1 , aData.getLength() , m_f ); +} + + +void OFileWriter::flush() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException) +{ + fflush( m_f ); +} + +void OFileWriter::closeOutput() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException) +{ + fclose( m_f ); + m_f = 0; +} + + +class OSaxWriterTest : + public WeakImplHelper< XSimpleTest > +{ +public: + explicit OSaxWriterTest( const Reference < XMultiServiceFactory > & rFactory ) : m_rFactory( rFactory ) + { + + } + ~OSaxWriterTest() {} + + +public: + virtual void SAL_CALL testInvariant( + const OUString& TestName, + const Reference < XInterface >& TestObject) + throw ( IllegalArgumentException, + RuntimeException); + + virtual sal_Int32 SAL_CALL test( + const OUString& TestName, + const Reference < XInterface >& TestObject, + sal_Int32 hTestHandle) + throw ( IllegalArgumentException,RuntimeException); + + virtual sal_Bool SAL_CALL testPassed() + throw ( RuntimeException); + virtual Sequence< OUString > SAL_CALL getErrors() throw (RuntimeException); + virtual Sequence< Any > SAL_CALL getErrorExceptions() throw (RuntimeException); + virtual Sequence< OUString > SAL_CALL getWarnings() throw (RuntimeException); + +private: + void testSimple( const Reference< XExtendedDocumentHandler > &r ); + void testExceptions( const Reference< XExtendedDocumentHandler > &r ); + void testDTD( const Reference< XExtendedDocumentHandler > &r ); + void testPerformance( const Reference< XExtendedDocumentHandler > &r ); + void writeParagraph( const Reference< XExtendedDocumentHandler > &r , const OUString & s); + +private: + Sequence<Any> m_seqExceptions; + Sequence<OUString> m_seqErrors; + Sequence<OUString> m_seqWarnings; + Reference < XMultiServiceFactory > m_rFactory; + +}; + + +/*---------------------------------------- +* +* Attributelist implementation +* +*----------------------------------------*/ +struct AttributeListImpl_impl; +class AttributeListImpl : public WeakImplHelper< XAttributeList > +{ +public: + AttributeListImpl(); + AttributeListImpl( const AttributeListImpl & ); + ~AttributeListImpl(); + +public: + virtual sal_Int16 SAL_CALL getLength() throw (RuntimeException); + virtual OUString SAL_CALL getNameByIndex(sal_Int16 i) throw (RuntimeException); + virtual OUString SAL_CALL getTypeByIndex(sal_Int16 i) throw (RuntimeException); + virtual OUString SAL_CALL getTypeByName(const OUString& aName) throw (RuntimeException); + virtual OUString SAL_CALL getValueByIndex(sal_Int16 i) throw (RuntimeException); + virtual OUString SAL_CALL getValueByName(const OUString& aName) throw (RuntimeException); + +public: + void addAttribute( const OUString &sName , + const OUString &sType , + const OUString &sValue ); + void clear(); + +private: + struct AttributeListImpl_impl *m_pImpl; +}; + + +struct TagAttribute +{ + TagAttribute(){} + TagAttribute( const OUString &sName, + const OUString &sType , + const OUString &sValue ) + { + sName = sName; + sType = sType; + sValue = sValue; + } + + OUString sName; + OUString sType; + OUString sValue; +}; + +struct AttributeListImpl_impl +{ + AttributeListImpl_impl() + { + // performance improvement during adding + vecAttribute.reserve(20); + } + vector<struct TagAttribute> vecAttribute; +}; + + +sal_Int16 AttributeListImpl::getLength() throw (RuntimeException) +{ + return m_pImpl->vecAttribute.size(); +} + + +AttributeListImpl::AttributeListImpl( const AttributeListImpl &r ) +{ + m_pImpl = new AttributeListImpl_impl; + *m_pImpl = *(r.m_pImpl); +} + +OUString AttributeListImpl::getNameByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( i < m_pImpl->vecAttribute.size() ) { + return m_pImpl->vecAttribute[i].sName; + } + return OUString(); +} + + +OUString AttributeListImpl::getTypeByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( i < m_pImpl->vecAttribute.size() ) { + return m_pImpl->vecAttribute[i].sType; + } + return OUString(); +} + +OUString AttributeListImpl::getValueByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( i < m_pImpl->vecAttribute.size() ) { + return m_pImpl->vecAttribute[i].sValue; + } + return OUString(); + +} + +OUString AttributeListImpl::getTypeByName( const OUString& sName ) throw (RuntimeException) +{ + auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(), + [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; }); + if (ii != m_pImpl->vecAttribute.end()) + return (*ii).sType; + return OUString(); +} + +OUString AttributeListImpl::getValueByName(const OUString& sName) throw (RuntimeException) +{ + auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(), + [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; }); + if (ii != m_pImpl->vecAttribute.end()) + return (*ii).sValue; + return OUString(); +} + + +AttributeListImpl::AttributeListImpl() +{ + m_pImpl = new AttributeListImpl_impl; +} + + +AttributeListImpl::~AttributeListImpl() +{ + delete m_pImpl; +} + + +void AttributeListImpl::addAttribute( const OUString &sName , + const OUString &sType , + const OUString &sValue ) +{ + m_pImpl->vecAttribute.push_back( TagAttribute( sName , sType , sValue ) ); +} + +void AttributeListImpl::clear() +{ + m_pImpl->vecAttribute.clear(); + +} + + +/** +* for external binding +* +* +**/ +Reference < XInterface > SAL_CALL OSaxWriterTest_CreateInstance( const Reference < XMultiServiceFactory > & rSMgr ) throw (Exception) +{ + OSaxWriterTest *p = new OSaxWriterTest( rSMgr ); + Reference < XInterface > xService = *p; + return xService; +} + +OUString OSaxWriterTest_getServiceName( ) throw () +{ + return OUString( "test.com.sun.star.xml.sax.Writer"); +} + +OUString OSaxWriterTest_getImplementationName( ) throw () +{ + return OUString( "test.extensions.xml.sax.Writer"); +} + +Sequence<OUString> OSaxWriterTest_getSupportedServiceNames( ) throw () +{ + Sequence<OUString> aRet { OSaxWriterTest_getImplementationName( ) }; + return aRet; +} + + +void OSaxWriterTest::testInvariant( const OUString& TestName, + const Reference < XInterface >& TestObject ) + throw ( IllegalArgumentException, RuntimeException) +{ + if( OUString("com.sun.star.xml.sax.Writer") == TestName ) { + Reference< XDocumentHandler > doc( TestObject , UNO_QUERY ); + Reference< XExtendedDocumentHandler > ext( TestObject , UNO_QUERY ); + Reference< XActiveDataSource > source( TestObject , UNO_QUERY ); + + ERROR_ASSERT( doc.is() , "XDocumentHandler cannot be queried" ); + ERROR_ASSERT( ext.is() , "XExtendedDocumentHandler cannot be queried" ); + ERROR_ASSERT( source.is() , "XActiveDataSource cannot be queried" ); + } + else { + BUILD_ERROR( 0 , "wrong test" ); + } +} + + +sal_Int32 OSaxWriterTest::test( + const OUString& TestName, + const Reference < XInterface >& TestObject, + sal_Int32 hTestHandle) + throw ( IllegalArgumentException,RuntimeException) +{ + if( OUString( "com.sun.star.xml.sax.Writer") == TestName ) + { + try + { + if( 0 == hTestHandle ) + { + testInvariant( TestName , TestObject ); + } + else + { + Reference< XExtendedDocumentHandler > writer( TestObject , UNO_QUERY ); + + if( 1 == hTestHandle ) { + testSimple( writer ); + } + else if( 2 == hTestHandle ) { + testExceptions( writer ); + } + else if( 3 == hTestHandle ) { + testDTD( writer ); + } + else if( 4 == hTestHandle ) { + testPerformance( writer ); + } + } + } + catch( Exception & e ) { + OString o = OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US ); + BUILD_ERROR( 0 , o.getStr() ); + } + catch( ... ) + { + BUILD_ERROR( 0 , "unknown exception (Exception is not base class)" ); + } + + hTestHandle ++; + + if( hTestHandle >= 5) { + // all tests finished. + hTestHandle = -1; + } + } + else { + BUILD_ERROR( 0 , "service not supported by test." ); + } + return hTestHandle; +} + + +sal_Bool OSaxWriterTest::testPassed() throw (RuntimeException) +{ + return m_seqErrors.getLength() == 0; +} + + +Sequence< OUString > OSaxWriterTest::getErrors() throw (RuntimeException) +{ + return m_seqErrors; +} + + +Sequence< Any > OSaxWriterTest::getErrorExceptions() throw (RuntimeException) +{ + return m_seqExceptions; +} + + +Sequence< OUString > OSaxWriterTest::getWarnings() throw (RuntimeException) +{ + return m_seqWarnings; +} + +void OSaxWriterTest::writeParagraph( + const Reference< XExtendedDocumentHandler > &r , + const OUString & s) +{ + int nMax = s.getLength(); + int nStart = 0; + + Sequence<sal_uInt16> seq( s.getLength() ); + memcpy( seq.getArray() , s.getStr() , s.getLength() * sizeof( sal_uInt16 ) ); + + for( int n = 1 ; n < nMax ; n++ ){ + if( 32 == seq.getArray()[n] ) { + r->allowLineBreak(); + r->characters( s.copy( nStart , n - nStart ) ); + nStart = n; + } + } + r->allowLineBreak(); + r->characters( s.copy( nStart , n - nStart ) ); +} + + +void OSaxWriterTest::testSimple( const Reference< XExtendedDocumentHandler > &r ) +{ + OUString testParagraph = OUString( + "This is a stupid test to check whether the SAXWriter possibly makes " + "line breaks halfway correctly or whether it writes the line to the " + "bitter end." ); + + OFileWriter *pw = new OFileWriter("output.xml"); + AttributeListImpl *pList = new AttributeListImpl; + + Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY ); + Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY ); + + Reference< XActiveDataSource > source( r , UNO_QUERY ); + + ERROR_ASSERT( ref.is() , "no output stream" ); + ERROR_ASSERT( source.is() , "no active data source" ); + + source->setOutputStream( ref ); + + r->startDocument(); + + pList->addAttribute( OUString( "Arg1" ), + OUString( "CDATA") , + OUString( "bla\n u") ); + pList->addAttribute( OUString( "Arg2") , + OUString( "CDATA") , + OUString( "blub") ); + + r->startElement( OUString( "tag1") , rList ); + r->ignorableWhitespace( OUString() ); + + r->characters( OUString( "huhu") ); + r->ignorableWhitespace( OUString() ); + + r->startElement( OUString( "hi") , rList ); + r->ignorableWhitespace( OUString() ); + + // the ampersand must be converted & -> & + r->characters( OUString( "ü") ); + + // Test added for mib. Tests if errors during conversions occurs + r->ignorableWhitespace( OUString() ); + char array[256]; + for( sal_Int32 n = 32 ; n < 254 ; n ++ ) { + array[n-32] = n; + } + array[254-32] = 0; + r->characters( + OStringToOUString( array , RTL_TEXTENCODING_SYMBOL ) + ); + r->ignorableWhitespace( OUString() ); + + // '>' must not be converted + r->startCDATA(); + r->characters( OUString( ">fsfsdf<") ); + r->endCDATA(); + r->ignorableWhitespace( OUString() ); + + writeParagraph( r , testParagraph ); + + + r->ignorableWhitespace( OUString() ); + r->comment( OUString( "This is a comment !") ); + r->ignorableWhitespace( OUString() ); + + r->startElement( OUString( "emptytagtest") , rList ); + r->endElement( OUString( "emptytagtest") ); + + r->endElement( OUString( "hi") ); + r->ignorableWhitespace( OUString() ); + + r->endElement( OUString( "tag1") ); + r->endDocument(); + +} + +void OSaxWriterTest::testExceptions( const Reference< XExtendedDocumentHandler > & r ) +{ + + OFileWriter *pw = new OFileWriter("output2.xml"); + AttributeListImpl *pList = new AttributeListImpl; + + Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY ); + Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY ); + + Reference< XActiveDataSource > source( r , UNO_QUERY ); + + ERROR_ASSERT( ref.is() , "no output stream" ); + ERROR_ASSERT( source.is() , "no active data source" ); + + source->setOutputStream( ref ); + + { // startDocument must be called before start element + sal_Bool bException = sal_True; + try + { + r->startElement( OUString( "huhu") , rList ); + bException = sal_False; + } + catch( SAXException &e ) + { + + } + ERROR_ASSERT( bException , "expected exception not thrown !" ); + } + + r->startDocument(); + + r->startElement( OUString( "huhu") , rList ); + r->startCDATA(); + + { + sal_Bool bException = sal_True; + try{ + r->startElement( OUString( "huhu") , rList ); + bException = sal_False; + } + catch( SAXException &e ) { + + } + ERROR_ASSERT( bException , "expected exception not thrown !" ); + } + + r->endCDATA(); + + { + sal_Unicode array[] = { 'a' , 'b' , 4 , 9 , 10 }; + OUString o( array , 5 ); + try + { + r->characters( o ); + ERROR_ASSERT( 0 , "Writer allowed to write forbidden characters" ); + } + catch( SAXException & e ) + { + + } + } + r->endElement( OUString( "huhu") ); + + r->endDocument(); +} + + +void OSaxWriterTest::testDTD(const Reference< XExtendedDocumentHandler > &r ) +{ + OFileWriter *pw = new OFileWriter("outputDTD.xml"); + AttributeListImpl *pList = new AttributeListImpl; + + Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY ); + Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY ); + + Reference< XActiveDataSource > source( r , UNO_QUERY ); + + ERROR_ASSERT( ref.is() , "no output stream" ); + ERROR_ASSERT( source.is() , "no active data source" ); + + source->setOutputStream( ref ); + + + r->startDocument(); + r->unknown( OUString( "<!DOCTYPE iCalendar >\n") ); + r->startElement( OUString( "huhu") , rList ); + + r->endElement( OUString( "huhu") ); + r->endDocument(); +} + +void OSaxWriterTest::testPerformance(const Reference< XExtendedDocumentHandler > &r ) +{ + OFileWriter *pw = new OFileWriter("testPerformance.xml"); + AttributeListImpl *pList = new AttributeListImpl; + + OUString testParagraph = + OUString( + "This is a stupid test to check whether the SAXWriter possibly makes " + "line breaks halfway correctly or whether it writes the line to the " + "bitter end." ); + + + Reference< XAttributeList > rList( (XAttributeList *) pList , UNO_QUERY ); + Reference< XOutputStream > ref( ( XOutputStream * ) pw , UNO_QUERY ); + + Reference< XActiveDataSource > source( r , UNO_QUERY ); + + ERROR_ASSERT( ref.is() , "no output stream" ); + ERROR_ASSERT( source.is() , "no active data source" ); + + source->setOutputStream( ref ); + + TimeValue aStartTime, aEndTime; + osl_getSystemTime( &aStartTime ); + + + r->startDocument(); + // just write a bunch of xml tags ! + // for performance testing + sal_Int32 i2; + OUString huhu( "huhu" ); + const int ITERATIONS = 125; + for( i2 = 0 ; i2 < ITERATIONS ; i2 ++ ) + { + r->startElement( OUString( "tag" ) + + OUString::valueOf( i2 ), rList ); + for( sal_Int32 i = 0 ; i < 450 ; i ++ ) + { + r->ignorableWhitespace( "" ); + r->startElement( huhu , rList ); + r->characters( testParagraph ); + + r->ignorableWhitespace( "" ); + r->endElement( huhu ); + } + } + for( i2 = ITERATIONS-1 ; i2 >= 0 ; i2-- ) + { + r->ignorableWhitespace( "" ); + r->endElement( OUString( "tag" ) + OUString::valueOf( i2 ) ); + } + + r->endDocument(); + + osl_getSystemTime( &aEndTime ); + + double fStart = (double)aStartTime.Seconds + ((double)aStartTime.Nanosec / 1000000000.0); + double fEnd = (double)aEndTime.Seconds + ((double)aEndTime.Nanosec / 1000000000.0); + + printf( "Performance writing : %g s\n" , fEnd - fStart ); +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/test/saxdemo.cxx b/sax/test/saxdemo.cxx new file mode 100644 index 0000000000..7139d60ef6 --- /dev/null +++ b/sax/test/saxdemo.cxx @@ -0,0 +1,626 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +// testcomponent - Loads a service and its testcomponent from dlls performs a test. +// Expands the dll-names depending on the actual environment. +// Example : testcomponent com.sun.star.io.Pipe stm + +// Therefore the testcode must exist in teststm and the testservice must be named test.com.sun.star.uno.io.Pipe + + +#include <stdio.h> +#include <vector> +#include <cstring> + +#include <com/sun/star/registry/XImplementationRegistration.hpp> +#include <com/sun/star/lang/XComponent.hpp> + +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp> + +#include <com/sun/star/io/XOutputStream.hpp> +#include <com/sun/star/io/XActiveDataSource.hpp> + +#include <cppuhelper/servicefactory.hxx> +#include <cppuhelper/implbase.hxx> + + +using namespace ::std; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::registry; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star::io; + + +/************ + * Sequence of bytes -> InputStream + ************/ +class OInputStream : public WeakImplHelper < XInputStream > +{ +public: + explicit OInputStream( const Sequence< sal_Int8 >&seq ) : + m_seq( seq ), + nPos( 0 ) + {} + +public: + virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) + throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException) + { + nBytesToRead = (nBytesToRead > m_seq.getLength() - nPos ) ? + m_seq.getLength() - nPos : + nBytesToRead; + aData = Sequence< sal_Int8 > ( &(m_seq.getConstArray()[nPos]) , nBytesToRead ); + nPos += nBytesToRead; + return nBytesToRead; + } + virtual sal_Int32 SAL_CALL readSomeBytes( + css::uno::Sequence< sal_Int8 >& aData, + sal_Int32 nMaxBytesToRead ) + throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException) + { + return readBytes( aData, nMaxBytesToRead ); + } + virtual void SAL_CALL skipBytes( sal_Int32 /* nBytesToSkip */ ) + throw(NotConnectedException, BufferSizeExceededException, IOException, RuntimeException) + { + // not implemented + } + virtual sal_Int32 SAL_CALL available( ) + throw(NotConnectedException, IOException, RuntimeException) + { + return m_seq.getLength() - nPos; + } + virtual void SAL_CALL closeInput( ) + throw(NotConnectedException, IOException, RuntimeException) + { + // not needed + } + Sequence< sal_Int8> m_seq; + sal_Int32 nPos; +}; + + +// Helper : create an input stream from a file + +Reference< XInputStream > createStreamFromFile( + const char *pcFile ) +{ + FILE *f = fopen( pcFile , "rb" ); + Reference< XInputStream > r; + + if( f ) { + fseek( f , 0 , SEEK_END ); + int nLength = ftell( f ); + fseek( f , 0 , SEEK_SET ); + + Sequence<sal_Int8> seqIn(nLength); + fread( seqIn.getArray() , nLength , 1 , f ); + + r.set( new OInputStream( seqIn ) ); + fclose( f ); + } + return r; +} + + +// The document handler, which is needed for the saxparser +// The Documenthandler for reading sax + +class TestDocumentHandler : + public WeakImplHelper< XExtendedDocumentHandler , XEntityResolver , XErrorHandler > +{ +public: + TestDocumentHandler( ) + { + } + +public: // Error handler + virtual void SAL_CALL error(const Any& aSAXParseException) throw (SAXException, RuntimeException) + { + printf( "Error !\n" ); + throw SAXException( + OUString( "error from error handler") , + Reference < XInterface >() , + aSAXParseException ); + } + virtual void SAL_CALL fatalError(const Any& /* aSAXParseException */) throw (SAXException, RuntimeException) + { + printf( "Fatal Error !\n" ); + } + virtual void SAL_CALL warning(const Any& /* aSAXParseException */) throw (SAXException, RuntimeException) + { + printf( "Warning !\n" ); + } + + +public: // ExtendedDocumentHandler + + virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException) + { + m_iElementCount = 0; + m_iAttributeCount = 0; + m_iWhitespaceCount =0; + m_iCharCount=0; + printf( "document started\n" ); + } + virtual void SAL_CALL endDocument() throw (SAXException, RuntimeException) + { + printf( "document finished\n" ); + printf( "(ElementCount %d),(AttributeCount %d),(WhitespaceCount %d),(CharCount %d)\n", + m_iElementCount, m_iAttributeCount, m_iWhitespaceCount , m_iCharCount ); + + } + virtual void SAL_CALL startElement(const OUString& /* aName */, + const Reference< XAttributeList > & xAttribs) + throw (SAXException,RuntimeException) + { + m_iElementCount ++; + m_iAttributeCount += xAttribs->getLength(); + } + + virtual void SAL_CALL endElement(const OUString& /* aName */) throw (SAXException,RuntimeException) + { + // ignored + } + + virtual void SAL_CALL characters(const OUString& aChars) throw (SAXException,RuntimeException) + { + m_iCharCount += aChars.getLength(); + } + virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) throw (SAXException,RuntimeException) + { + m_iWhitespaceCount += aWhitespaces.getLength(); + } + + virtual void SAL_CALL processingInstruction(const OUString& /* aTarget */, const OUString& /* aData */) throw (SAXException,RuntimeException) + { + // ignored + } + + virtual void SAL_CALL setDocumentLocator(const Reference< XLocator> & /* xLocator */) + throw (SAXException,RuntimeException) + { + // ignored + } + + virtual InputSource SAL_CALL resolveEntity( + const OUString& sPublicId, + const OUString& sSystemId) + throw (RuntimeException) + { + InputSource source; + source.sSystemId = sSystemId; + source.sPublicId = sPublicId; + + source.aInputStream = createStreamFromFile( + OUStringToOString( sSystemId, RTL_TEXTENCODING_ASCII_US).getStr() ); + + return source; + } + + virtual void SAL_CALL startCDATA() throw (SAXException,RuntimeException) + { + } + virtual void SAL_CALL endCDATA() throw (SAXException,RuntimeException) + { + } + virtual void SAL_CALL comment(const OUString& /* sComment */) throw (SAXException,RuntimeException) + { + } + virtual void SAL_CALL unknown(const OUString& /* sString */) throw (SAXException,RuntimeException) + { + } + + virtual void SAL_CALL allowLineBreak() throw (SAXException, RuntimeException ) + { + + } + +public: + int m_iElementCount; + int m_iAttributeCount; + int m_iWhitespaceCount; + int m_iCharCount; +}; + + +// helper implementation for writing +// implements an XAttributeList + +struct AttributeListImpl_impl; +class AttributeListImpl : public WeakImplHelper< XAttributeList > +{ +public: + AttributeListImpl(); + AttributeListImpl( const AttributeListImpl & ); + ~AttributeListImpl(); + +public: + virtual sal_Int16 SAL_CALL getLength() throw (RuntimeException); + virtual OUString SAL_CALL getNameByIndex(sal_Int16 i) throw (RuntimeException); + virtual OUString SAL_CALL getTypeByIndex(sal_Int16 i) throw (RuntimeException); + virtual OUString SAL_CALL getTypeByName(const OUString& aName) throw (RuntimeException); + virtual OUString SAL_CALL getValueByIndex(sal_Int16 i) throw (RuntimeException); + virtual OUString SAL_CALL getValueByName(const OUString& aName) throw (RuntimeException); + +public: + void addAttribute( const OUString &sName , + const OUString &sType , + const OUString &sValue ); + void clear(); + +private: + struct AttributeListImpl_impl *m_pImpl; +}; + + +struct TagAttribute +{ + TagAttribute(){} + TagAttribute( const OUString &s_Name, + const OUString &s_Type , + const OUString &s_Value ) + : sName(s_Name), + sType(s_Type), + sValue(s_Value) + { + } + + OUString sName; + OUString sType; + OUString sValue; +}; + +struct AttributeListImpl_impl +{ + AttributeListImpl_impl() + { + // performance improvement during adding + vecAttribute.reserve(20); + } + vector<struct TagAttribute> vecAttribute; +}; + + +sal_Int16 AttributeListImpl::getLength() throw (RuntimeException) +{ + return (sal_Int16) m_pImpl->vecAttribute.size(); +} + + +AttributeListImpl::AttributeListImpl( const AttributeListImpl &r ) +{ + m_pImpl = new AttributeListImpl_impl; + *m_pImpl = *(r.m_pImpl); +} + +OUString AttributeListImpl::getNameByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( i < sal::static_int_cast<sal_Int16>(m_pImpl->vecAttribute.size()) ) { + return m_pImpl->vecAttribute[i].sName; + } + return OUString(); +} + + +OUString AttributeListImpl::getTypeByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( i < sal::static_int_cast<sal_Int16>(m_pImpl->vecAttribute.size()) ) { + return m_pImpl->vecAttribute[i].sType; + } + return OUString(); +} + +OUString AttributeListImpl::getValueByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( i < sal::static_int_cast<sal_Int16>(m_pImpl->vecAttribute.size()) ) { + return m_pImpl->vecAttribute[i].sValue; + } + return OUString(); + +} + +OUString AttributeListImpl::getTypeByName( const OUString& sName ) throw (RuntimeException) +{ + auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(), + [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; }); + if (ii != m_pImpl->vecAttribute.end()) + return (*ii).sType; + return OUString(); +} + +OUString AttributeListImpl::getValueByName(const OUString& sName) throw (RuntimeException) +{ + auto ii = std::find_if(m_pImpl->vecAttribute.begin(), m_pImpl->vecAttribute.end(), + [&sName](const struct TagAttribute& rAttr) { return rAttr.sName == sName; }); + if (ii != m_pImpl->vecAttribute.end()) + return (*ii).sValue; + return OUString(); +} + + +AttributeListImpl::AttributeListImpl() +{ + m_pImpl = new AttributeListImpl_impl; +} + + +AttributeListImpl::~AttributeListImpl() +{ + delete m_pImpl; +} + + +void AttributeListImpl::addAttribute( const OUString &sName , + const OUString &sType , + const OUString &sValue ) +{ + m_pImpl->vecAttribute.push_back( TagAttribute( sName , sType , sValue ) ); +} + +void AttributeListImpl::clear() +{ + m_pImpl->vecAttribute.clear(); +} + + +// helper function for writing +// ensures that linebreaks are inserted +// when writing a long text. +// Note: this implementation may be a bit slow, +// but it shows, how the SAX-Writer handles the allowLineBreak calls. + +void writeParagraphHelper( + const Reference< XExtendedDocumentHandler > &r , + const OUString & s) +{ + int nMax = s.getLength(); + int nStart = 0; + int n = 1; + + Sequence<sal_uInt16> seq( s.getLength() ); + memcpy( seq.getArray() , s.getStr() , s.getLength() * sizeof( sal_uInt16 ) ); + + for( n = 1 ; n < nMax ; n++ ){ + if( 32 == seq.getArray()[n] ) { + r->allowLineBreak(); + r->characters( s.copy( nStart , n - nStart ) ); + nStart = n; + } + } + r->allowLineBreak(); + r->characters( s.copy( nStart , n - nStart ) ); +} + + +// helper implementation for SAX-Writer +// writes data to a file + +class OFileWriter : + public WeakImplHelper< XOutputStream > +{ +public: + explicit OFileWriter( char *pcFile ) { strncpy( m_pcFile , pcFile, 256 - 1 ); m_f = 0; } + + +public: + virtual void SAL_CALL writeBytes(const Sequence< sal_Int8 >& aData) + throw (NotConnectedException, BufferSizeExceededException, RuntimeException); + virtual void SAL_CALL flush() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException); + virtual void SAL_CALL closeOutput() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException); +private: + char m_pcFile[256]; + FILE *m_f; +}; + + +void OFileWriter::writeBytes(const Sequence< sal_Int8 >& aData) + throw (NotConnectedException, BufferSizeExceededException, RuntimeException) +{ + if( ! m_f ) { + m_f = fopen( m_pcFile , "w" ); + } + + fwrite( aData.getConstArray() , 1 , aData.getLength() , m_f ); +} + + +void OFileWriter::flush() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException) +{ + fflush( m_f ); +} + +void OFileWriter::closeOutput() + throw (NotConnectedException, BufferSizeExceededException, RuntimeException) +{ + fclose( m_f ); + m_f = 0; +} + + +// Needed to switch on solaris threads +#ifdef __sun +extern "C" void ChangeGlobalInit(); +#endif +int main (int argc, char **argv) +{ + + if( argc < 3) { + printf( "usage : saxdemo inputfile outputfile\n" ); + exit( 0 ); + } +#ifdef __sun + // switch on threads in solaris + ChangeGlobalInit(); +#endif + + // create service manager + Reference< XMultiServiceFactory > xSMgr = createRegistryServiceFactory( + OUString( "applicat.rdb" ) ); + + Reference < XImplementationRegistration > xReg; + try + { + // Create registration service + Reference < XInterface > x = xSMgr->createInstance( "com.sun.star.registry.ImplementationRegistration" ); + xReg.set( x , UNO_QUERY ); + } + catch( Exception & ) { + printf( "Couldn't create ImplementationRegistration service\n" ); + exit(1); + } + + OString sTestName; + try + { + // Load dll for the tested component + OUString aDllName( "sax.uno" SAL_DLLEXTENSION ); + xReg->registerImplementation( + OUString("com.sun.star.loader.SharedLibrary"), + aDllName, + Reference< XSimpleRegistry > () ); + } + catch( Exception &e ) { + printf( "Couldn't reach sax dll\n" ); + printf( "%s\n" , OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US ).getStr() ); + + exit(1); + } + + + // parser demo + // read xml from a file and count elements + + Reference< XInterface > x = xSMgr->createInstance( "com.sun.star.xml.sax.Parser" ); + if( x.is() ) + { + Reference< XParser > rParser( x , UNO_QUERY ); + + // create and connect the document handler to the parser + TestDocumentHandler *pDocHandler = new TestDocumentHandler( ); + + Reference < XDocumentHandler > rDocHandler( (XDocumentHandler *) pDocHandler ); + Reference< XEntityResolver > rEntityResolver( (XEntityResolver *) pDocHandler ); + + rParser->setDocumentHandler( rDocHandler ); + rParser->setEntityResolver( rEntityResolver ); + + // create the input stream + InputSource source; + source.aInputStream = createStreamFromFile( argv[1] ); + source.sSystemId = OUString::createFromAscii( argv[1] ); + + try + { + // start parsing + rParser->parseStream( source ); + } + + catch( Exception & e ) + { + OString o1 = OUStringToOString(e.Message, RTL_TEXTENCODING_UTF8 ); + printf( "Exception during parsing : %s\n" , o1.getStr() ); + } + } + else + { + printf( "couldn't create sax-parser component\n" ); + } + + + // The SAX-Writer demo + + x= xSMgr->createInstance("com.sun.star.xml.sax.Writer"); + if( x.is() ) + { + printf( "start writing to %s\n" , argv[2] ); + + OFileWriter *pw = new OFileWriter( argv[2] ); + Reference< XActiveDataSource > source( x , UNO_QUERY ); + source->setOutputStream( Reference< XOutputStream> ( (XOutputStream*) pw ) ); + + AttributeListImpl *pList = new AttributeListImpl; + Reference< XAttributeList > rList( (XAttributeList *) pList ); + + Reference< XExtendedDocumentHandler > r( x , UNO_QUERY ); + r->startDocument(); + + pList->addAttribute( OUString( "Arg1" ), + OUString( "CDATA") , + OUString( "foo\n u") ); + pList->addAttribute( OUString( "Arg2") , + OUString( "CDATA") , + OUString( "foo2") ); + + r->startElement( OUString( "tag1") , rList ); + // tells the writer to insert a linefeed + r->ignorableWhitespace( OUString() ); + + r->characters( OUString( "huhu") ); + r->ignorableWhitespace( OUString() ); + + r->startElement( OUString( "hi") , rList ); + r->ignorableWhitespace( OUString() ); + + // the enpassant must be converted & -> & + r->characters( OUString( "ü") ); + r->ignorableWhitespace( OUString() ); + + // '>' must not be converted + r->startCDATA(); + r->characters( OUString( " > foo < ") ); + r->endCDATA(); + r->ignorableWhitespace( OUString() ); + + OUString testParagraph = OUString( + "This is only a test to check, if the writer inserts line feeds " + "if needed or if the writer puts the whole text into one line." ); + writeParagraphHelper( r , testParagraph ); + + r->ignorableWhitespace( OUString() ); + r->comment( OUString( "This is a comment !") ); + r->ignorableWhitespace( OUString() ); + + r->startElement( OUString( "emptytagtest") , rList ); + r->endElement( OUString( "emptytagtest") ); + r->ignorableWhitespace( OUString() ); + + r->endElement( OUString( "hi") ); + r->ignorableWhitespace( OUString() ); + + r->endElement( OUString( "tag1") ); + r->endDocument(); + + printf( "finished writing\n" ); + } + else + { + printf( "couldn't create sax-writer component\n" ); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sax/test/testcomponent.cxx b/sax/test/testcomponent.cxx new file mode 100644 index 0000000000..51f8f244e0 --- /dev/null +++ b/sax/test/testcomponent.cxx @@ -0,0 +1,223 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +// testcomponent - Loads a service and its testcomponent from dlls performs a test. +// Expands the dll-names depending on the actual environment. +// Example : testcomponent com.sun.star.io.Pipe stm + +// Therefore the testcode must exist in teststm and the testservice must be named com.sun.star.io.Pipe + + +#include <stdio.h> +#include <com/sun/star/registry/XImplementationRegistration.hpp> +#include <com/sun/star/lang/XComponent.hpp> + +#include <com/sun/star/test/XSimpleTest.hpp> + +#include <cppuhelper/servicefactory.hxx> + + +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::test; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::registry; + +// Needed to switch on solaris threads +#ifdef __sun +extern "C" void ChangeGlobalInit(); +#endif + +int main (int argc, char **argv) +{ + + if( argc < 3) { + printf( "usage : testcomponent service dll [additional dlls]\n" ); + exit( 0 ); + } +#ifdef __sun + // switch on threads in solaris + ChangeGlobalInit(); +#endif + + // create service manager + Reference< XMultiServiceFactory > xSMgr = + createRegistryServiceFactory( OUString( "applicat.rdb") ); + + Reference < XImplementationRegistration > xReg; + Reference < XSimpleRegistry > xSimpleReg; + + try + { + // Create registration service + Reference < XInterface > x = xSMgr->createInstance( "com.sun.star.registry.ImplementationRegistration" ); + xReg.set( x , UNO_QUERY ); + } + catch (const Exception&) + { + printf( "Couldn't create ImplementationRegistration service\n" ); + exit(1); + } + + char szBuf[1024]; + OString sTestName; + + try + { + // Load dll for the tested component + for( int n = 2 ; n <argc ; n ++ ) { +#ifdef _WIN32 + OUString aDllName = OStringToOUString( argv[n] , RTL_TEXTENCODING_ASCII_US ); +#else + OUString aDllName = "lib"; + aDllName += OStringToOUString( argv[n] , RTL_TEXTENCODING_ASCII_US ); + aDllName += ".so"; +#endif + xReg->registerImplementation( + OUString("com.sun.star.loader.SharedLibrary"), + aDllName, + xSimpleReg ); + } + } + catch (const Exception &e) + { + printf( "Couldn't reach dll %s\n" , szBuf ); + printf( "%s\n" , OUStringToOString( e.Message , RTL_TEXTENCODING_ASCII_US ).getStr() ); + + exit(1); + } + + + try + { + // Load dll for the test component + sTestName = "test"; + sTestName += argv[2]; + +#ifdef _WIN32 + OUString aDllName = OStringToOUString( sTestName , RTL_TEXTENCODING_ASCII_US ); +#else + OUString aDllName = "lib"; + aDllName += OStringToOUString( sTestName , RTL_TEXTENCODING_ASCII_US ); + aDllName += ".so"; +#endif + + xReg->registerImplementation( + OUString("com.sun.star.loader.SharedLibrary") , + aDllName, + xSimpleReg ); + } + catch (const Exception&) + { + printf( "Couldn't reach dll %s\n" , szBuf ); + exit(1); + } + + + // Instantiate test service + sTestName = "test."; + sTestName += argv[1]; + + Reference < XInterface > xIntTest = + xSMgr->createInstance( OStringToOUString( sTestName , RTL_TEXTENCODING_ASCII_US ) ); + Reference< XSimpleTest > xTest( xIntTest , UNO_QUERY ); + + if( ! xTest.is() ) { + printf( "Couldn't instantiate test service \n" ); + exit( 1 ); + } + + + sal_Int32 nHandle = 0; + sal_Int32 nNewHandle; + sal_Int32 nErrorCount = 0; + sal_Int32 nWarningCount = 0; + + // loop until all test are performed + while( nHandle != -1 ) + { + // Instantiate service + Reference< XInterface > x = + xSMgr->createInstance( OStringToOUString( argv[1] , RTL_TEXTENCODING_ASCII_US ) ); + if( ! x.is() ) + { + printf( "Couldn't instantiate service !\n" ); + exit( 1 ); + } + + // do the test + try + { + nNewHandle = xTest->test( + OStringToOUString( argv[1] , RTL_TEXTENCODING_ASCII_US ) , x , nHandle ); + } + catch (const Exception &e) + { + OString o = OUStringToOString( e.Message, RTL_TEXTENCODING_ASCII_US ); + printf( "testcomponent : uncaught exception %s\n" , o.getStr() ); + exit(1); + } + catch (...) + { + printf( "testcomponent : uncaught unknown exception\n" ); + exit(1); + } + + + // print errors and warning + Sequence<OUString> seqErrors = xTest->getErrors(); + Sequence<OUString> seqWarnings = xTest->getWarnings(); + if( seqWarnings.getLength() > nWarningCount ) + { + printf( "Warnings during test %" SAL_PRIxUINT32 "!\n" , nHandle ); + for( ; nWarningCount < seqWarnings.getLength() ; nWarningCount ++ ) + { + OString o = OUStringToOString( + seqWarnings.getArray()[nWarningCount], RTL_TEXTENCODING_ASCII_US ); + printf( "Warning\n%s\n---------\n" , o.getStr() ); + } + } + + + if( seqErrors.getLength() > nErrorCount ) { + printf( "Errors during test %" SAL_PRIxUINT32 "!\n" , nHandle ); + for( ; nErrorCount < seqErrors.getLength() ; nErrorCount ++ ) { + OString o = OUStringToOString( + seqErrors.getArray()[nErrorCount], RTL_TEXTENCODING_ASCII_US ); + printf( "%s\n" , o.getStr() ); + } + } + + nHandle = nNewHandle; + } + + if( xTest->testPassed() ) { + printf( "Test passed !\n" ); + } + else { + printf( "Test failed !\n" ); + } + + Reference <XComponent > rComp( xSMgr , UNO_QUERY ); + rComp->dispose(); + return 0; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |