From 267c6f2ac71f92999e969232431ba04678e7437e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 07:54:39 +0200 Subject: Adding upstream version 4:24.2.0. Signed-off-by: Daniel Baumann --- include/oox/core/binarycodec.hxx | 42 +++++ include/oox/core/contexthandler.hxx | 115 ++++++++++++++ include/oox/core/contexthandler2.hxx | 291 ++++++++++++++++++++++++++++++++++ include/oox/core/fastparser.hxx | 118 ++++++++++++++ include/oox/core/fasttokenhandler.hxx | 68 ++++++++ include/oox/core/filterbase.hxx | 280 ++++++++++++++++++++++++++++++++ include/oox/core/filterdetect.hxx | 172 ++++++++++++++++++++ include/oox/core/fragmenthandler.hxx | 139 ++++++++++++++++ include/oox/core/fragmenthandler2.hxx | 115 ++++++++++++++ include/oox/core/recordparser.hxx | 88 ++++++++++ include/oox/core/relations.hxx | 120 ++++++++++++++ include/oox/core/relationshandler.hxx | 58 +++++++ include/oox/core/xmlfilterbase.hxx | 290 +++++++++++++++++++++++++++++++++ 13 files changed, 1896 insertions(+) create mode 100644 include/oox/core/binarycodec.hxx create mode 100644 include/oox/core/contexthandler.hxx create mode 100644 include/oox/core/contexthandler2.hxx create mode 100644 include/oox/core/fastparser.hxx create mode 100644 include/oox/core/fasttokenhandler.hxx create mode 100644 include/oox/core/filterbase.hxx create mode 100644 include/oox/core/filterdetect.hxx create mode 100644 include/oox/core/fragmenthandler.hxx create mode 100644 include/oox/core/fragmenthandler2.hxx create mode 100644 include/oox/core/recordparser.hxx create mode 100644 include/oox/core/relations.hxx create mode 100644 include/oox/core/relationshandler.hxx create mode 100644 include/oox/core/xmlfilterbase.hxx (limited to 'include/oox/core') diff --git a/include/oox/core/binarycodec.hxx b/include/oox/core/binarycodec.hxx new file mode 100644 index 0000000000..e7bfbaaa43 --- /dev/null +++ b/include/oox/core/binarycodec.hxx @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_BINARYCODEC_HXX +#define INCLUDED_OOX_CORE_BINARYCODEC_HXX + +#include + +#include +#include + +namespace com::sun::star { + namespace beans { struct NamedValue; } +} + +namespace oox { class AttributeList; } + +namespace oox::core::CodecHelper +{ + /** Returns the password hash if it is in the required 16-bit limit. */ + OOX_DLLPUBLIC sal_uInt16 getPasswordHash( const AttributeList& rAttribs, sal_Int32 nElement ); +} // namespace oox::core::CodecHelper + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/contexthandler.hxx b/include/oox/core/contexthandler.hxx new file mode 100644 index 0000000000..89bdb2b9af --- /dev/null +++ b/include/oox/core/contexthandler.hxx @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_CONTEXTHANDLER_HXX +#define INCLUDED_OOX_CORE_CONTEXTHANDLER_HXX + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace xml::sax { class XFastAttributeList; } + namespace xml::sax { class XLocator; } +} + +namespace oox { class SequenceInputStream; } + +namespace oox::core { + +class XmlFilterBase; +struct Relation; +class Relations; + +class ContextHandler; +typedef ::rtl::Reference< ContextHandler > ContextHandlerRef; + +struct FragmentBaseData; +typedef std::shared_ptr< FragmentBaseData > FragmentBaseDataRef; + +typedef ::cppu::WeakImplHelper< css::xml::sax::XFastContextHandler > ContextHandler_BASE; + +class OOX_DLLPUBLIC ContextHandler : public ContextHandler_BASE +{ +public: + explicit ContextHandler( const ContextHandler& rParent ); + virtual ~ContextHandler() override; + + /** Returns the filter instance. */ + XmlFilterBase& getFilter() const; + /** Returns the relations of the current fragment. */ + const Relations& getRelations() const; + /** Returns the full path of the current fragment. */ + const OUString& getFragmentPath() const; + + /** Returns the full fragment path for the target of the passed relation. */ + OUString getFragmentPathFromRelation( const Relation& rRelation ) const; + /** Returns the full fragment path for the passed relation identifier. */ + OUString getFragmentPathFromRelId( const OUString& rRelId ) const; + /** Returns the full fragment path for the first relation of the passed type. */ + OUString getFragmentPathFromFirstType( std::u16string_view rType ) const; + OUString getFragmentPathFromFirstTypeFromOfficeDoc( std::u16string_view rType ) const; + + // com.sun.star.xml.sax.XFastContextHandler interface --------------------- + + virtual void SAL_CALL startFastElement( ::sal_Int32 Element, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL startUnknownElement( const OUString& Namespace, const OUString& Name, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL endFastElement( ::sal_Int32 Element ) override; + virtual void SAL_CALL endUnknownElement( const OUString& Namespace, const OUString& Name ) override; + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL createFastChildContext( ::sal_Int32 Element, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL createUnknownChildContext( const OUString& Namespace, const OUString& Name, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL characters( const OUString& aChars ) override; + + // record context interface ----------------------------------------------- + + virtual ContextHandlerRef createRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ); + virtual void startRecord( sal_Int32 nRecId, SequenceInputStream& rStrm ); + virtual void endRecord( sal_Int32 nRecId ); + +protected: + /** Helper constructor for the FragmentHandler. */ + explicit ContextHandler( FragmentBaseDataRef rxBaseData ); + + void implSetLocator( const css::uno::Reference< css::xml::sax::XLocator >& rxLocator ); + +#ifdef _MSC_VER + ContextHandler() {} // workaround +#endif + +private: + ContextHandler& operator=( const ContextHandler& ) = delete; + +private: + FragmentBaseDataRef mxBaseData; ///< Base data of the fragment. +}; + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/contexthandler2.hxx b/include/oox/core/contexthandler2.hxx new file mode 100644 index 0000000000..1414db7f24 --- /dev/null +++ b/include/oox/core/contexthandler2.hxx @@ -0,0 +1,291 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_CONTEXTHANDLER2_HXX +#define INCLUDED_OOX_CORE_CONTEXTHANDLER2_HXX + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace xml::sax { class XFastAttributeList; } + namespace xml::sax { class XFastContextHandler; } +} + +namespace oox { + class AttributeList; + class SequenceInputStream; +} + +namespace oox::core { + +const sal_Int32 XML_ROOT_CONTEXT = SAL_MAX_INT32; + +struct ElementInfo; + +/** Helper class that provides a context stack. + + Fragment handlers and context handlers derived from this helper class will + track the identifiers of the visited elements in a stack. The idea is to + use the same instance of a fragment handler or context handler to process + several nested elements in an XML stream. For that, the abstract function + onCreateContext() has to return 'this' for the passed element. + + Derived classes have to implement the createFastChildContext(), + startFastElement(), characters(), and endFastElement() functions from the + com.sun.star.xml.sax.XFastContextHandler interface by simply forwarding + them to the respective implCreateChildContext(), implStartElement(), + implCharacters(), and implEndElement() functions of this helper. This is + implemented already in the classes ContextHandler2 and FragmentHandler2. + The new abstract functions have to be implemented according to the elements + to be processed. + + Similarly, for binary import, derived classes have to forward the + createRecordContext(), startRecord(), and endRecord() functions from the + ContextHandler class to the implCreateRecordContext(), implStartRecord(), + and implEndRecord() functions of this helper. Again, this is implemented + already in the classes ContextHandler2 and FragmentHandler2. + */ +class OOX_DLLPUBLIC SAL_LOPLUGIN_ANNOTATE("crosscast") ContextHandler2Helper +{ +public: + explicit ContextHandler2Helper( bool bEnableTrimSpace, XmlFilterBase& rFilter ); + explicit ContextHandler2Helper( const ContextHandler2Helper& rParent ); + virtual ~ContextHandler2Helper(); + + // allow instances to be stored in ::rtl::Reference + virtual void SAL_CALL acquire() noexcept = 0; + virtual void SAL_CALL release() noexcept = 0; + + // interface -------------------------------------------------------------- + + /** Will be called to create a context handler for the passed element. + + Usually 'this' can be returned to improve performance by reusing the + same instance to process several elements. Used by OOXML import only. + */ + virtual ContextHandlerRef onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs ) = 0; + + /** Will be called when a new element has been started. + + This function is called at the context handler returned from + onCreateContext(), or, for root elements of an XML stream, at the + fragment handler itself. + + The current element identifier can be accessed with getCurrentElement() + or isCurrentElement(). Used by OOXML import only. + */ + virtual void onStartElement( const AttributeList& rAttribs ) = 0; + + /** Will be called before a new child element starts, or if the current + element is about to be left. + + This helper function collects all text fragments received by the + characters() function (such as encoded characters which are passed in + separate calls to the characters() function), and passes the + concatenated and trimmed string. + + The current element identifier can be accessed with getCurrentElement() + or isCurrentElement(). Used by OOXML import only. + */ + virtual void onCharacters( const OUString& rChars ) = 0; + + /** Will be called when the current element is about to be left. + + The current element identifier can be accessed with getCurrentElement() + or isCurrentElement(). Used by OOXML import only. + */ + virtual void onEndElement() = 0; + + /** Will be called to create a context handler for the passed record. + + Usually 'this' can be returned to improve performance by reusing the + same instance to process several records. Used by BIFF import only. + */ + virtual ContextHandlerRef onCreateRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ) = 0; + + /** Will be called when a new record block in a binary stream has been + started. + + The current record identifier can be accessed with getCurrentElement() + or isCurrentElement(). Used by BIFF import only. + */ + virtual void onStartRecord( SequenceInputStream& rStrm ) = 0; + + /** Will be called when the current record block is about to be left. + + The current record identifier can be accessed with getCurrentElement() + or isCurrentElement(). Used by BIFF import only. + */ + virtual void onEndRecord() = 0; + + // helpers ---------------------------------------------------------------- + + /** Returns the identifier of the currently processed element. Ignores MCE elements in stack */ + sal_Int32 getCurrentElement() const; + + /** Returns the identifier of the currently processed element - Including MCE root elements */ + sal_Int32 getCurrentElementWithMce() const; + + /** Returns true, if nElement contains the identifier of the currently + processed element. */ + bool isCurrentElement( sal_Int32 nElement ) const + { return getCurrentElement() == nElement; } + + /** Returns true, if either nElement1 or nElement2 contain the identifier + of the currently processed element. */ + bool isCurrentElement( sal_Int32 nElement1, sal_Int32 nElement2 ) const + { return isCurrentElement( nElement1 ) || isCurrentElement( nElement2 ); } + + /** Returns the identifier of the specified parent element. */ + sal_Int32 getParentElement( sal_Int32 nCountBack = 1 ) const; + + /** Returns true, if nElement contains the identifier of the specified + parent element. */ + bool isParentElement( sal_Int32 nElement, sal_Int32 nCountBack = 1 ) const + { return getParentElement( nCountBack ) == nElement; } + + /** Returns true, if the element currently processed is the root element of + the context or fragment handler. */ + bool isRootElement() const; + + // implementation --------------------------------------------------------- + +protected: + /** Must be called from createFastChildContext() in derived classes. */ + css::uno::Reference< css::xml::sax::XFastContextHandler > + implCreateChildContext( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ); + + /** Must be called from startFastElement() in derived classes. */ + void implStartElement( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ); + + /** Must be called from characters() in derived classes. */ + void implCharacters( std::u16string_view rChars ); + + /** Must be called from endFastElement() in derived classes. */ + void implEndElement( sal_Int32 nElement ); + + /** Must be called from createRecordContext() in derived classes. */ + ContextHandlerRef implCreateRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ); + + /** Must be called from startRecord() in derived classes. */ + void implStartRecord( sal_Int32 nRecId, SequenceInputStream& rStrm ); + + /** Must be called from endRecord() in derived classes. */ + void implEndRecord( sal_Int32 nRecId ); + + bool prepareMceContext( sal_Int32 nElement, const AttributeList& rAttribs ); + XmlFilterBase& getDocFilter() const { return mrFilter; } + + enum class MCE_STATE + { + Started, + FoundChoice + }; + + MCE_STATE getMCEState() const { return aMceState.back(); } + void setMCEState( MCE_STATE aState ) { aMceState.back() = aState; } + void addMCEState( MCE_STATE aState ) { aMceState.push_back( aState ); } + void removeMCEState() { aMceState.pop_back(); } + bool isMCEStateEmpty() const { return aMceState.empty(); } + +private: + ContextHandler2Helper& operator=( const ContextHandler2Helper& ) = delete; + + ElementInfo& pushElementInfo( sal_Int32 nElement ); + void popElementInfo(); + void processCollectedChars(); + +private: + typedef std::vector< ElementInfo > ContextStack; + typedef std::shared_ptr< ContextStack > ContextStackRef; + + ContextStackRef mxContextStack; ///< Stack of all processed elements. + size_t mnRootStackSize; ///< Stack size on construction time. + std::vector aMceState; + +protected: + bool mbEnableTrimSpace; ///< True = trim whitespace in characters(). + XmlFilterBase& mrFilter; +}; + +class OOX_DLLPUBLIC ContextHandler2 : public ContextHandler, public ContextHandler2Helper +{ +public: + explicit ContextHandler2( ContextHandler2Helper const & rParent ); + virtual ~ContextHandler2() override; + + ContextHandler2(ContextHandler2 const &) = default; + ContextHandler2(ContextHandler2 &&) = default; + ContextHandler2 & operator =(ContextHandler2 const &) = delete; // due to ContextHandler + ContextHandler2 & operator =(ContextHandler2 &&) = delete; // due to ContextHandler + + // resolve ambiguity from base classes + virtual void SAL_CALL acquire() noexcept override { ContextHandler::acquire(); } + virtual void SAL_CALL release() noexcept override { ContextHandler::release(); } + + // com.sun.star.xml.sax.XFastContextHandler interface --------------------- + + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL + createFastChildContext( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ) final override; + + virtual void SAL_CALL startFastElement( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ) final override; + + virtual void SAL_CALL characters( const OUString& rChars ) final override; + + virtual void SAL_CALL endFastElement( sal_Int32 nElement ) final override; + + // oox.core.ContextHandler interface -------------------------------------- + + virtual ContextHandlerRef createRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ) override; + virtual void startRecord( sal_Int32 nRecId, SequenceInputStream& rStrm ) override; + virtual void endRecord( sal_Int32 nRecId ) override; + + // oox.core.ContextHandler2Helper interface ------------------------------- + + virtual ContextHandlerRef onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs ) override; + virtual void onStartElement( const AttributeList& rAttribs ) override; + virtual void onCharacters( const OUString& rChars ) override; + virtual void onEndElement() override; + + virtual ContextHandlerRef onCreateRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ) override; + virtual void onStartRecord( SequenceInputStream& rStrm ) override; + virtual void onEndRecord() override; +}; + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/fastparser.hxx b/include/oox/core/fastparser.hxx new file mode 100644 index 0000000000..058bd344b5 --- /dev/null +++ b/include/oox/core/fastparser.hxx @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_FASTPARSER_HXX +#define INCLUDED_OOX_CORE_FASTPARSER_HXX + +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace io { class XInputStream; } + namespace uno { class XComponentContext; } + namespace xml::sax { class XFastDocumentHandler; } + namespace xml::sax { class XFastParser; } + namespace xml::sax { class XFastTokenHandler; } + namespace xml::sax { struct InputSource; } +} + +namespace oox { + struct NamespaceMap; + class StorageBase; +} + +namespace sax_fastparser { + class FastSaxParser; +} + +namespace oox::core { + + +/** Wrapper for a fast SAX parser that works on automatically generated OOXML + token and namespace identifiers. + */ +class OOX_DLLPUBLIC FastParser +{ +public: + /// @throws css::uno::RuntimeException + explicit FastParser(); + + FastParser(const FastParser&) = delete; + FastParser& operator=(const FastParser&) = delete; + + ~FastParser(); + + /** Registers an OOXML namespace at the parser. + @throws css::lang::IllegalArgumentException + @throws css::uno::RuntimeException + */ + void registerNamespace( sal_Int32 nNamespaceId ); + + /** Sets the passed document handler that will receive the SAX parser events. + @throws css::uno::RuntimeException + */ + void setDocumentHandler( + const css::uno::Reference< css::xml::sax::XFastDocumentHandler >& rxDocHandler ); + + void clearDocumentHandler(); + + /** Parses the passed SAX input source. + @param bCloseStream True = closes the stream in the input source after parsing. + @throws css::xml::sax::SAXException + @throws css::io::IOException + @throws css::uno::RuntimeException + */ + void parseStream( const css::xml::sax::InputSource& rInputSource, bool bCloseStream = false ); + + /** Parses the passed input stream. + @param bCloseStream True = closes the passed stream after parsing. + @throws css::xml::sax::SAXException + @throws css::io::IOException + @throws css::uno::RuntimeException + */ + void parseStream( + const css::uno::Reference< css::io::XInputStream >& rxInStream, + const OUString& rStreamName ); + + /** Parses a stream from the passed storage with the specified name. + @param bCloseStream True = closes the stream after parsing. + @throws css::xml::sax::SAXException + @throws css::io::IOException + @throws css::uno::RuntimeException + */ + void parseStream( StorageBase& rStorage, const OUString& rStreamName ); + + const css::uno::Reference< css::xml::sax::XFastTokenHandler >& + getTokenHandler() const { return mxTokenHandler; } + +private: + css::uno::Reference mxTokenHandler; + const NamespaceMap& mrNamespaceMap; + rtl::Reference mxParser; +}; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/fasttokenhandler.hxx b/include/oox/core/fasttokenhandler.hxx new file mode 100644 index 0000000000..396474b988 --- /dev/null +++ b/include/oox/core/fasttokenhandler.hxx @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_FASTTOKENHANDLER_HXX +#define INCLUDED_OOX_CORE_FASTTOKENHANDLER_HXX + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace oox { class TokenMap; } + +namespace oox::core { + + +/** Wrapper implementing the com.sun.star.xml.sax.XFastTokenHandler API interface + that provides access to the tokens generated from the internal token name list. + */ +class OOX_DLLPUBLIC FastTokenHandler final : + public cppu::ImplInheritanceHelper< sax_fastparser::FastTokenHandlerBase, css::lang::XServiceInfo > +{ +public: + explicit FastTokenHandler(); + virtual ~FastTokenHandler() override; + + // XServiceInfo + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + // XFastTokenHandler + virtual css::uno::Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken ) override; + virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier ) override; + + // Much faster direct C++ shortcut to the method that matters + virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override; + +private: + const TokenMap& mrTokenMap; ///< Reference to global token map singleton. +}; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/filterbase.hxx b/include/oox/core/filterbase.hxx new file mode 100644 index 0000000000..eec0d6a19c --- /dev/null +++ b/include/oox/core/filterbase.hxx @@ -0,0 +1,280 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_FILTERBASE_HXX +#define INCLUDED_OOX_CORE_FILTERBASE_HXX + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace beans { struct PropertyValue; } + namespace drawing { class XShape; } + namespace frame { class XFrame; } + namespace frame { class XModel; } + namespace io { class XInputStream; } + namespace io { class XOutputStream; } + namespace io { class XStream; } + namespace lang { class XComponent; } + namespace lang { class XMultiServiceFactory; } + namespace task { class XStatusIndicator; } + namespace uno { class XComponentContext; } +} + +namespace comphelper { + class SequenceAsHashMap; +} +namespace utl { + class MediaDescriptor; +} + +namespace oox { + class GraphicHelper; + class ModelObjectHelper; +} + +namespace oox::ole { + class OleObjectHelper; + class VbaProject; +} + +namespace oox::core { + +enum OoxmlVersion +{ + /** There are currently 5 editions of ECMA-376, latest is from 2021. + * 1st edition allegedly corresponds to Word 2007 + * 2nd edition allegedly corresponds to ISO 29500:2008 + * it's unclear what changed in later editions; there is: + Annex M. Differences Between ECMA-376:2016 and ECMA-376:2006 + but that's relative to 1st edition. + */ + ECMA_376_1ST_EDITION, + ISOIEC_29500_2008 +}; + +struct FilterBaseImpl; + +typedef ::cppu::WeakImplHelper< + css::lang::XServiceInfo, + css::lang::XInitialization, + css::document::XImporter, + css::document::XExporter, + css::document::XFilter > + FilterBase_BASE; + +class OOX_DLLPUBLIC FilterBase : public FilterBase_BASE +{ +public: + /// @throws css::uno::RuntimeException + explicit FilterBase( + const css::uno::Reference< css::uno::XComponentContext >& rxContext ); + + virtual ~FilterBase() override; + + /** Returns true, if filter is an import filter. */ + bool isImportFilter() const; + /** Returns true, if filter is an export filter. */ + bool isExportFilter() const; + + OoxmlVersion getVersion() const; + + /** Derived classes implement import of the entire document. */ + virtual bool importDocument() = 0; + + /** Derived classes implement export of the entire document. */ + virtual bool exportDocument() = 0; + + + /** Returns the component context passed in the filter constructor (always existing). */ + const css::uno::Reference< css::uno::XComponentContext >& + getComponentContext() const; + + /** Returns the document model (always existing). */ + const css::uno::Reference< css::frame::XModel >& + getModel() const; + + /** Returns the service factory provided by the document model (always existing). */ + const css::uno::Reference< css::lang::XMultiServiceFactory >& + getModelFactory() const; + + /** Returns the frame that will contain the document model (may be null). */ + const css::uno::Reference< css::frame::XFrame >& + getTargetFrame() const; + + /** Returns the status indicator (may be null). */ + const css::uno::Reference< css::task::XStatusIndicator >& + getStatusIndicator() const; + + /** Returns the FilterData */ + ::comphelper::SequenceAsHashMap& getFilterData() const; + + /** Returns the media descriptor. */ + utl::MediaDescriptor& getMediaDescriptor() const; + + /** Returns the URL of the imported or exported file. */ + const OUString& getFileUrl() const; + + /** Returns an absolute URL for the passed relative or absolute URL. */ + OUString getAbsoluteUrl( const OUString& rUrl ) const; + + /** Returns the base storage of the imported/exported file. */ + StorageRef const & getStorage() const; + + /** Opens and returns the specified input stream from the base storage. + + @param rStreamName + The name of the embedded storage stream. The name may contain + slashes to open streams from embedded substorages. If base stream + access has been enabled in the storage, the base stream can be + accessed by passing an empty string as stream name. + */ + css::uno::Reference< css::io::XInputStream > + openInputStream( const OUString& rStreamName ) const; + + /** Opens and returns the specified output stream from the base storage. + + @param rStreamName + The name of the embedded storage stream. The name may contain + slashes to open streams from embedded substorages. If base stream + access has been enabled in the storage, the base stream can be + accessed by passing an empty string as stream name. + */ + css::uno::Reference< css::io::XOutputStream > + openOutputStream( const OUString& rStreamName ) const; + + /** Commits changes to base storage (and substorages) */ + void commitStorage() const; + + // helpers ---------------------------------------------------------------- + + /** Returns a helper for the handling of graphics and graphic objects. */ + GraphicHelper& getGraphicHelper() const; + + /** Returns a helper with containers for various named drawing objects for + the imported document. */ + ModelObjectHelper& getModelObjectHelper() const; + + ModelObjectHelper& getModelObjectHelperForModel( + const css::uno::Reference& xFactory) const; + + /** Returns a helper for the handling of OLE objects. */ + ::oox::ole::OleObjectHelper& getOleObjectHelper() const; + + /** Returns the VBA project manager. */ + ::oox::ole::VbaProject& getVbaProject() const; + + /** Imports the raw binary data from the specified stream. + @return True, if the data could be imported from the stream. */ + bool importBinaryData( StreamDataSequence & orDataSeq, const OUString& rStreamName ); + + // com.sun.star.lang.XServiceInfo interface ------------------------------- + + virtual sal_Bool SAL_CALL + supportsService( const OUString& rServiceName ) override; + + virtual css::uno::Sequence< OUString > SAL_CALL + getSupportedServiceNames() override; + + // com.sun.star.lang.XInitialization interface ---------------------------- + + /** Receives user defined arguments. + + @param rArgs + the sequence of arguments passed to the filter. The implementation + expects one or two arguments. The first argument shall be the + com.sun.star.lang.XMultiServiceFactory interface of the global + service factory. The optional second argument may contain a + sequence of com.sun.star.beans.NamedValue objects. The different + filter implementations may support different arguments. + */ + virtual void SAL_CALL initialize( + const css::uno::Sequence< css::uno::Any >& rArgs ) override; + + // com.sun.star.document.XImporter interface ------------------------------ + + virtual void SAL_CALL setTargetDocument( + const css::uno::Reference< css::lang::XComponent >& rxDocument ) override; + + // com.sun.star.document.XExporter interface ------------------------------ + + virtual void SAL_CALL setSourceDocument( + const css::uno::Reference< css::lang::XComponent >& rxDocument ) override; + + // com.sun.star.document.XFilter interface -------------------------------- + + virtual sal_Bool SAL_CALL filter( + const css::uno::Sequence< css::beans::PropertyValue >& rMediaDescSeq ) override; + + virtual void SAL_CALL cancel() override; + + bool exportVBA() const; + + bool isExportTemplate() const; + +protected: + virtual css::uno::Reference< css::io::XInputStream > + implGetInputStream( utl::MediaDescriptor& rMediaDesc ) const; + virtual css::uno::Reference< css::io::XStream > + implGetOutputStream( utl::MediaDescriptor& rMediaDesc ) const; + + virtual bool implFinalizeExport( utl::MediaDescriptor& rMediaDescriptor ); + + css::uno::Reference< css::io::XStream > const & + getMainDocumentStream( ) const; + +private: + void setMediaDescriptor( + const css::uno::Sequence< css::beans::PropertyValue >& rMediaDescSeq ); + + /** Derived classes may create a specialized graphic helper, e.g. for + resolving palette colors. */ + virtual GraphicHelper* implCreateGraphicHelper() const; + + /** Derived classes create a VBA project manager object. */ + virtual ::oox::ole::VbaProject* implCreateVbaProject() const = 0; + + virtual StorageRef implCreateStorage( + const css::uno::Reference< css::io::XInputStream >& rxInStream ) const = 0; + virtual StorageRef implCreateStorage( + const css::uno::Reference< css::io::XStream >& rxOutStream ) const = 0; + +private: + std::unique_ptr< FilterBaseImpl > mxImpl; +}; + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/filterdetect.hxx b/include/oox/core/filterdetect.hxx new file mode 100644 index 0000000000..8b01bcc5f2 --- /dev/null +++ b/include/oox/core/filterdetect.hxx @@ -0,0 +1,172 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_FILTERDETECT_HXX +#define INCLUDED_OOX_CORE_FILTERDETECT_HXX + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace beans { struct PropertyValue; } + namespace io { class XInputStream; } + namespace uno { class XComponentContext; } + namespace xml::sax { class XFastAttributeList; } + namespace xml::sax { class XFastContextHandler; } + namespace xml::sax { class XLocator; } +} + +namespace utl { class MediaDescriptor; } + +namespace oox { class AttributeList; } + +namespace oox::core { + +enum class OOXMLVariant { + ECMA_Transitional, + ISO_Transitional, + ISO_Strict +}; + + +/** Document handler specifically designed for detecting OOXML file formats. + + It takes a reference to the filter string object via its constructor, and + puts the name of the detected filter to it, if it successfully finds one. + */ +class FilterDetectDocHandler final : public ::cppu::WeakImplHelper< css::xml::sax::XFastDocumentHandler > +{ +public: + explicit FilterDetectDocHandler( const css::uno::Reference< css::uno::XComponentContext >& rxContext, OUString& rFilter, OUString aFileName ); + virtual ~FilterDetectDocHandler() override; + + // XFastDocumentHandler + virtual void SAL_CALL startDocument() override; + virtual void SAL_CALL endDocument() override; + virtual void SAL_CALL processingInstruction( const OUString& rTarget, const OUString& rData ) override; + virtual void SAL_CALL setDocumentLocator( const css::uno::Reference< css::xml::sax::XLocator >& xLocator ) override; + + // XFastContextHandler + virtual void SAL_CALL startFastElement( sal_Int32 nElement, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL startUnknownElement( const OUString& Namespace, const OUString& Name, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL endFastElement( sal_Int32 Element ) override; + virtual void SAL_CALL endUnknownElement( const OUString& Namespace, const OUString& Name ) override; + virtual css::uno::Reference< XFastContextHandler > SAL_CALL createFastChildContext( sal_Int32 Element, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual css::uno::Reference< XFastContextHandler > SAL_CALL createUnknownChildContext( const OUString& Namespace, const OUString& Name, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL characters( const OUString& aChars ) override; + +private: + void parseRelationship( const AttributeList& rAttribs ); + + OUString getFilterNameFromContentType( std::u16string_view rContentType, std::u16string_view rFileName ); + void parseContentTypesDefault( const AttributeList& rAttribs ); + void parseContentTypesOverride( const AttributeList& rAttribs ); + +private: + typedef ::std::vector< sal_Int32 > ContextVector; + + OUString& mrFilterName; + OUString maFileName; + ContextVector maContextStack; + OUString maTargetPath; + OOXMLVariant maOOXMLVariant; + css::uno::Reference< css::uno::XComponentContext > mxContext; +}; + + +class OOX_DLLPUBLIC FilterDetect final : public ::cppu::WeakImplHelper +{ +public: + /// @throws css::uno::RuntimeException + explicit FilterDetect( const css::uno::Reference< css::uno::XComponentContext >& rxContext ); + virtual ~FilterDetect() override; + + /** Tries to extract an unencrypted ZIP package from the passed media + descriptor. + + First, this function checks if the input stream provided by the media + descriptor property 'InputStream' contains a ZIP package. If yes, this + stream is returned. + + Second, this function checks if the 'ComponentData' property exists and + contains a sequence of com.sun.star.beans.NamedValue. If yes, a named + value is searched with the name 'DecryptedPackage' and a value of type + com.sun.star.io.XStream. If the input stream provided by this XStream + contains a ZIP package, this input stream is returned. + + Third, this function checks if the input stream of the media descriptor + contains an OLE package. If yes, it checks the existence of the streams + 'EncryptionInfo' and 'EncryptedPackage' and tries to decrypt the package + into a temporary file. This may include requesting a password from the + media descriptor property 'Password' or from the user, using the + interaction handler provided by the descriptor. On success, and if the + decrypted package is a ZIP package, the XStream of the temporary file + is stored in the property 'ComponentData' of the media descriptor and + its input stream is returned. + */ + css::uno::Reference< css::io::XInputStream > + extractUnencryptedPackage( utl::MediaDescriptor& rMediaDesc ) const; + + // com.sun.star.lang.XServiceInfo interface ------------------------------- + + virtual OUString SAL_CALL getImplementationName() override; + virtual sal_Bool SAL_CALL supportsService( const OUString& rServiceName ) override; + virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override; + + // com.sun.star.document.XExtendedFilterDetection interface --------------- + + /** Detects MS Office 2007 file types and supports package decryption. + + The following file types are detected: + - MS Word 2007 XML Document (*.docx, *.docm) + - MS Word 2007 XML Template (*.dotx, *.dotm) + - MS Excel 2007 XML Document (*.xlsx, *.xlsm) + - MS Excel 2007 BIFF12 Document (*.xlsb) + - MS Excel 2007 XML Template (*.xltx, *.xltm) + - MS Powerpoint 2007 XML Document (*.pptx, *.pptm) + - MS Powerpoint 2007 XML Template (*.potx, *.potm) + + If the package is encrypted, the detection tries to decrypt it into a + temporary file. The user may be asked for a password. The XStream + interface of the temporary file will be stored in the 'ComponentData' + property of the passed media descriptor. + */ + virtual OUString SAL_CALL + detect( css::uno::Sequence< css::beans::PropertyValue >& rMediaDescSeq ) override; + +private: + css::uno::Reference< css::uno::XComponentContext > mxContext; +}; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/fragmenthandler.hxx b/include/oox/core/fragmenthandler.hxx new file mode 100644 index 0000000000..b39eae0116 --- /dev/null +++ b/include/oox/core/fragmenthandler.hxx @@ -0,0 +1,139 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_FRAGMENTHANDLER_HXX +#define INCLUDED_OOX_CORE_FRAGMENTHANDLER_HXX + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace io { class XInputStream; } + namespace xml::sax { class XFastAttributeList; } + namespace xml::sax { class XFastContextHandler; } + namespace xml::sax { class XLocator; } +} + +namespace oox::core { + +class XmlFilterBase; + +/** Base data of a fragment. + + This data is stored in a separate struct to make it accessible in every + child context handler of the fragment. + */ +struct FragmentBaseData +{ + XmlFilterBase& mrFilter; + const OUString maFragmentPath; + css::uno::Reference< css::xml::sax::XLocator > + mxLocator; + RelationsRef mxRelations; + + explicit FragmentBaseData( + XmlFilterBase& rFilter, + OUString aFragmentPath, + RelationsRef xRelations ); +}; + + +/** Describes record identifiers used to create contexts in a binary stream. + + If a record is used to start a new context, usually the record identifier + increased by 1 is used to mark the end of this context, e.g. the Excel + record SHEETDATA == 0x0091 starts the context, and the record + SHEETDATA_END == 0x0092 ends this context. But some records are used to + start a new context, though there is no identifier to end this context, + e.g. the ROW or EXTROW records. These record identifiers can be marked by + setting the mnEndRecId member of this struct to -1. + */ +struct RecordInfo +{ + sal_Int32 mnStartRecId; ///< Record identifier for context start. + sal_Int32 mnEndRecId; ///< Record identifier for context end, -1 = no record. +}; + + +typedef ::cppu::ImplInheritanceHelper< ContextHandler, css::xml::sax::XFastDocumentHandler > FragmentHandler_BASE; + +class OOX_DLLPUBLIC FragmentHandler : public FragmentHandler_BASE +{ +public: + explicit FragmentHandler( XmlFilterBase& rFilter, const OUString& rFragmentPath ); + virtual ~FragmentHandler() override; + + FragmentHandler(FragmentHandler const &) = default; + FragmentHandler(FragmentHandler &&) = default; + FragmentHandler & operator =(FragmentHandler const &) = delete; // due to ContextHandler + FragmentHandler & operator =(FragmentHandler &&) = delete; // due to ContextHandler + + /** Returns the com.sun.star.xml.sax.XFastContextHandler interface of this context. */ + css::uno::Reference< css::xml::sax::XFastContextHandler > + getFastContextHandler() { return static_cast< ContextHandler* >( this ); } + + // com.sun.star.xml.sax.XFastDocumentHandler interface -------------------- + + virtual void SAL_CALL startDocument() override; + virtual void SAL_CALL endDocument() override; + virtual void SAL_CALL processingInstruction( const OUString& rTarget, const OUString& rData ) override; + virtual void SAL_CALL setDocumentLocator( const css::uno::Reference< css::xml::sax::XLocator >& rxLocator ) override; + + // com.sun.star.xml.sax.XFastContextHandler interface --------------------- + + virtual void SAL_CALL startFastElement( ::sal_Int32 Element, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL startUnknownElement( const OUString& Namespace, const OUString& Name, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL endFastElement( ::sal_Int32 Element ) override; + virtual void SAL_CALL endUnknownElement( const OUString& Namespace, const OUString& Name ) override; + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL createFastChildContext( ::sal_Int32 Element, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL createUnknownChildContext( const OUString& Namespace, const OUString& Name, const css::uno::Reference< css::xml::sax::XFastAttributeList >& Attribs ) override; + virtual void SAL_CALL characters( const OUString& aChars ) override; + + // XML stream handling ---------------------------------------------------- + + /** Opens the fragment stream referred by the own fragment path. Derived + classes may provide specialized stream implementations. */ + virtual css::uno::Reference< css::io::XInputStream > + openFragmentStream() const; + + // binary records --------------------------------------------------------- + + virtual const RecordInfo* getRecordInfos() const; + +protected: + explicit FragmentHandler( XmlFilterBase& rFilter, const OUString& rFragmentPath, RelationsRef xRelations ); +}; + +typedef ::rtl::Reference< FragmentHandler > FragmentHandlerRef; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/fragmenthandler2.hxx b/include/oox/core/fragmenthandler2.hxx new file mode 100644 index 0000000000..05b2b2d0be --- /dev/null +++ b/include/oox/core/fragmenthandler2.hxx @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_FRAGMENTHANDLER2_HXX +#define INCLUDED_OOX_CORE_FRAGMENTHANDLER2_HXX + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace xml::sax { class XFastAttributeList; } + namespace xml::sax { class XFastContextHandler; } +} + +namespace oox { + class AttributeList; + class SequenceInputStream; +} + +namespace oox::core { + +class XmlFilterBase; + +class OOX_DLLPUBLIC FragmentHandler2 : public FragmentHandler, public ContextHandler2Helper +{ +public: + explicit FragmentHandler2( + XmlFilterBase& rFilter, + const OUString& rFragmentPath, + bool bEnableTrimSpace = true ); + virtual ~FragmentHandler2() override; + + FragmentHandler2(FragmentHandler2 const &) = default; + FragmentHandler2(FragmentHandler2 &&) = default; + FragmentHandler2 & operator =(FragmentHandler2 const &) = delete; // due to FragmentHandler + FragmentHandler2 & operator =(FragmentHandler2 &&) = delete; // due to FragmentHandler + + // resolve ambiguity from base classes + virtual void SAL_CALL acquire() noexcept override { FragmentHandler::acquire(); } + virtual void SAL_CALL release() noexcept override { FragmentHandler::release(); } + + // com.sun.star.xml.sax.XFastContextHandler interface --------------------- + + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL + createFastChildContext( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ) final override; + + virtual void SAL_CALL startFastElement( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ) final override; + + virtual void SAL_CALL characters( const OUString& rChars ) final override; + + virtual void SAL_CALL endFastElement( sal_Int32 nElement ) final override; + + // com.sun.star.xml.sax.XFastDocumentHandler interface -------------------- + + virtual void SAL_CALL startDocument() override; + + virtual void SAL_CALL endDocument() override; + + // oox.core.ContextHandler interface -------------------------------------- + + virtual ContextHandlerRef createRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ) override; + virtual void startRecord( sal_Int32 nRecId, SequenceInputStream& rStrm ) override; + virtual void endRecord( sal_Int32 nRecId ) override; + + // oox.core.ContextHandler2Helper interface ------------------------------- + + virtual ContextHandlerRef onCreateContext( sal_Int32 nElement, const AttributeList& rAttribs ) override; + virtual void onStartElement( const AttributeList& rAttribs ) override; + virtual void onCharacters( const OUString& rChars ) override; + virtual void onEndElement() override; + + virtual ContextHandlerRef onCreateRecordContext( sal_Int32 nRecId, SequenceInputStream& rStrm ) override; + virtual void onStartRecord( SequenceInputStream& rStrm ) override; + virtual void onEndRecord() override; + + // oox.core.FragmentHandler2 interface ------------------------------------ + + virtual void initializeImport(); + virtual void finalizeImport(); +}; + +typedef ::rtl::Reference< FragmentHandler2 > FragmentHandler2Ref; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/recordparser.hxx b/include/oox/core/recordparser.hxx new file mode 100644 index 0000000000..d8525ac8e5 --- /dev/null +++ b/include/oox/core/recordparser.hxx @@ -0,0 +1,88 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_RECORDPARSER_HXX +#define INCLUDED_OOX_CORE_RECORDPARSER_HXX + +#include +#include + +#include +#include +#include +#include +#include + +namespace oox { +namespace core { + +namespace prv { + class ContextStack; + class Locator; +} + + +struct RecordInputSource +{ + BinaryInputStreamRef mxInStream; + OUString maSystemId; +}; + + +class RecordParser final +{ +public: + RecordParser(); + ~RecordParser(); + + void setFragmentHandler( const ::rtl::Reference< FragmentHandler >& rxHandler ); + + /// @throws css::xml::sax::SAXException + /// @throws css::io::IOException + /// @throws css::uno::RuntimeException + void parseStream( const RecordInputSource& rInputSource ); + + const RecordInputSource& getInputSource() const { return maSource; } + +private: + /** Returns a RecordInfo struct that contains the passed record identifier + as context start identifier. */ + const RecordInfo* getStartRecordInfo( sal_Int32 nRecId ) const; + /** Returns a RecordInfo struct that contains the passed record identifier + as context end identifier. */ + const RecordInfo* getEndRecordInfo( sal_Int32 nRecId ) const; + +private: + typedef ::std::map< sal_Int32, RecordInfo > RecordInfoMap; + + RecordInputSource maSource; + ::rtl::Reference< FragmentHandler > mxHandler; + ::rtl::Reference< prv::Locator > mxLocator; + ::std::unique_ptr< prv::ContextStack > mxStack; + RecordInfoMap maStartMap; + RecordInfoMap maEndMap; +}; + + +} // namespace core +} // namespace oox + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/relations.hxx b/include/oox/core/relations.hxx new file mode 100644 index 0000000000..b8a6580648 --- /dev/null +++ b/include/oox/core/relations.hxx @@ -0,0 +1,120 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_RELATIONS_HXX +#define INCLUDED_OOX_CORE_RELATIONS_HXX + +#include +#include +#include +#include + +#include +#include + +namespace oox::core { + + +/** Expands to an OUString containing an 'officeDocument' transitional relation type created + from the passed literal(!) ASCII(!) character array. */ +#define CREATE_OFFICEDOC_RELATION_TYPE( ascii ) \ + ( u"http://schemas.openxmlformats.org/officeDocument/2006/relationships/" ascii ""_ustr ) + +/** Expands to an OUString containing an 'officeDocument' strict relation type created + from the passed literal(!) ASCII(!) character array. */ +#define CREATE_OFFICEDOC_RELATION_TYPE_STRICT( ascii ) \ + ( "http://purl.oclc.org/ooxml/officeDocument/relationships/" ascii ) + +/** Expands to an OUString containing an MS Office specific relation type + created from the passed literal(!) ASCII(!) character array. */ +#define CREATE_MSOFFICE_RELATION_TYPE( ascii ) \ + ( u"http://schemas.microsoft.com/office/2006/relationships/" ascii ) + +#define CREATE_XL_CONTENT_TYPE( ascii ) \ + ( "application/vnd.openxmlformats-officedocument.spreadsheetml." ascii "+xml" ) + +struct Relation +{ + OUString maId; + OUString maType; + OUString maTarget; + bool mbExternal; + + Relation() : mbExternal( false ) {} +}; + + +class Relations; +typedef std::shared_ptr< Relations > RelationsRef; + +class OOX_DLLPUBLIC Relations +{ +public: + explicit Relations( OUString aFragmentPath ); + + size_t size() const { return maMap.size(); } + size_t count( const OUString& rId ) const { return maMap.count( rId ); } + ::std::map< OUString, Relation >::const_iterator begin() const + { + return maMap.begin(); + } + ::std::map< OUString, Relation >::const_iterator end() const + { + return maMap.end(); + } + template + void emplace(Args&&... args) + { + maMap.emplace(std::forward(args)...); + } + + /** Returns the path of the fragment this relations collection is related to. */ + const OUString& getFragmentPath() const { return maFragmentPath; } + + /** Returns the relation with the passed relation identifier. */ + const Relation* getRelationFromRelId( const OUString& rId ) const; + /** Returns the first relation with the passed type. */ + const Relation* getRelationFromFirstType( std::u16string_view rType ) const; + /** Finds all relations associated with the passed type. */ + RelationsRef getRelationsFromTypeFromOfficeDoc( std::u16string_view rType ) const; + + /** Returns the external target of the relation with the passed relation identifier. */ + OUString getExternalTargetFromRelId( const OUString& rRelId ) const; + /** Returns the internal target of the relation with the passed relation identifier. */ + OUString getInternalTargetFromRelId( const OUString& rRelId ) const; + + /** Returns the full fragment path for the target of the passed relation. */ + OUString getFragmentPathFromRelation( const Relation& rRelation ) const; + /** Returns the full fragment path for the passed relation identifier. */ + OUString getFragmentPathFromRelId( const OUString& rRelId ) const; + /** Returns the full fragment path for the first relation of the passed type. */ + OUString getFragmentPathFromFirstType( std::u16string_view rType ) const; + OUString getFragmentPathFromFirstTypeFromOfficeDoc( std::u16string_view rType ) const; + +private: + ::std::map< OUString, Relation > maMap; + OUString maFragmentPath; +}; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/relationshandler.hxx b/include/oox/core/relationshandler.hxx new file mode 100644 index 0000000000..ec8f77e92d --- /dev/null +++ b/include/oox/core/relationshandler.hxx @@ -0,0 +1,58 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_RELATIONSHANDLER_HXX +#define INCLUDED_OOX_CORE_RELATIONSHANDLER_HXX + +#include +#include +#include +#include + +namespace com::sun::star { + namespace xml::sax { class XFastAttributeList; } + namespace xml::sax { class XFastContextHandler; } +} + +namespace oox::core { + +class XmlFilterBase; + +class RelationsFragment final : public FragmentHandler +{ +public: + explicit RelationsFragment( + XmlFilterBase& rFilter, + const RelationsRef& xRelations ); + + virtual css::uno::Reference< css::xml::sax::XFastContextHandler > SAL_CALL + createFastChildContext( + sal_Int32 nElement, + const css::uno::Reference< css::xml::sax::XFastAttributeList >& rxAttribs ) override; + +private: + RelationsRef mxRelations; +}; + + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/oox/core/xmlfilterbase.hxx b/include/oox/core/xmlfilterbase.hxx new file mode 100644 index 0000000000..0c6226452d --- /dev/null +++ b/include/oox/core/xmlfilterbase.hxx @@ -0,0 +1,290 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_OOX_CORE_XMLFILTERBASE_HXX +#define INCLUDED_OOX_CORE_XMLFILTERBASE_HXX + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace com::sun::star { + namespace document { class XDocumentProperties; } + namespace io { class XInputStream; } + namespace io { class XOutputStream; } + namespace io { class XStream; } + namespace text { class XText; } + namespace text { class XTextCursor; } + namespace text { class XTextField; } + namespace uno { class XComponentContext; } + namespace xml::dom { class XDocument; } + namespace xml::sax { class XFastSAXSerializable; } +} + +namespace oox { + namespace drawingml { class Theme; } + namespace drawingml::chart { class ChartConverter; } + namespace drawingml::table { + class TableStyleList; + typedef std::shared_ptr< TableStyleList > TableStyleListPtr; + } + namespace vml { class Drawing; } +} + +namespace rtl { template class Reference; } + +namespace sax_fastparser { + class FastSerializerHelper; + + typedef std::shared_ptr< FastSerializerHelper > FSHelperPtr; +} + +namespace utl { class MediaDescriptor; } + +namespace oox::drawingml +{ +class Shape; +} + +namespace oox::core { + +class FragmentHandler; +class FastParser; + +struct XmlFilterBaseImpl; + +using ShapePairs + = std::map, css::uno::Reference>; +using NamedShapePairs = std::map; + +class OOX_DLLPUBLIC XmlFilterBase : public FilterBase +{ +public: + /// @throws css::uno::RuntimeException + explicit XmlFilterBase( + const css::uno::Reference< css::uno::XComponentContext >& rxContext ); + + virtual ~XmlFilterBase() override; + + /** Has to be implemented by each filter, returns the current theme. */ + virtual const ::oox::drawingml::Theme* getCurrentTheme() const = 0; + + /** May be implemented by filters which handle Diagrams, default returns empty ptr */ + virtual std::shared_ptr<::oox::drawingml::Theme> getCurrentThemePtr() const; + + /** Has to be implemented by each filter to return the collection of VML shapes. */ + virtual ::oox::vml::Drawing* getVmlDrawing() = 0; + + /** Has to be implemented by each filter, returns a filter-specific chart + converter object, that should be global per imported document. */ + virtual ::oox::drawingml::chart::ChartConverter* getChartConverter() = 0; + + /** Helper to switch chart data table - specifically for xlsx imports */ + virtual void useInternalChartDataTable( bool /*bInternal*/ ) { } + + /** Has to be implemented by each filter to return the table style list. */ + virtual ::oox::drawingml::table::TableStyleListPtr getTableStyles() = 0; + + + OUString getFragmentPathFromFirstTypeFromOfficeDoc( std::u16string_view rPart ); + + /** Imports a fragment using the passed fragment handler, which contains + the full path to the fragment stream. + + @return True, if the fragment could be imported. + */ + bool importFragment( const rtl::Reference& rxHandler ); + bool importFragment( const rtl::Reference& rxHandler, FastParser& rParser ); + + /** Imports a fragment into an xml::dom::XDocument. + + @param rFragmentPath path to fragment + + @return a non-empty reference to the XDocument, if the + fragment could be imported. + */ + css::uno::Reference< css::xml::dom::XDocument> importFragment( const OUString& rFragmentPath ); + + /** Imports a fragment from an xml::dom::XDocument using the + passed fragment handler + + @param rxHandler fragment handler; path to fragment is + ignored, input source is the rxSerializer + + @param rxSerializer usually retrieved from a + xml::dom::XDocument, will get serialized into rxHandler + + @return true, if the fragment could be imported. + */ + bool importFragment( const ::rtl::Reference< FragmentHandler >& rxHandler, + const css::uno::Reference< css::xml::sax::XFastSAXSerializable >& rxSerializer ); + + /** Imports the relations fragment associated with the specified fragment. + + @return The relations collection of the specified fragment. + */ + RelationsRef importRelations( const OUString& rFragmentPath ); + + /** Adds new relation. + + @param rType + Relation type. + + @param rTarget + Relation target. + + @return Added relation Id. + */ + OUString addRelation( const OUString& rType, std::u16string_view rTarget ); + + /** Adds new relation to part's relations. + + @param rPartName + Part name the relations are related to. The relations will be stored in /_rels/.rels. + + @param rType + Relation type. + + @param rTarget + Relation target. + + @return Added relation Id. + */ + OUString addRelation( const css::uno::Reference< css::io::XOutputStream >& rOutputStream, const OUString& rType, std::u16string_view rTarget, bool bExternal = false ); + + /** Opens and returns the specified output stream from the base storage with specified media type. + + @param rStreamName + The name of the embedded storage stream. The name may contain + slashes to open streams from embedded substorages. If base stream + access has been enabled in the storage, the base stream can be + accessed by passing an empty string as stream name. + + @param rMediaType + The media type string, used in [Content_Types].xml stream in base + storage. + + @return The opened output stream. + */ + css::uno::Reference< css::io::XOutputStream > + openFragmentStream( + const OUString& rStreamName, + const OUString& rMediaType ); + + /** Opens specified output stream from the base storage with specified + media type and returns new fast serializer for that stream. + + @param rStreamName + The name of the embedded storage stream. The name may contain + slashes to open streams from embedded substorages. If base stream + access has been enabled in the storage, the base stream can be + accessed by passing an empty string as stream name. + + @param rMediaType + The media type string, used in [Content_Types].xml stream in base + storage. + + @return newly created serializer helper. + */ + ::sax_fastparser::FSHelperPtr + openFragmentStreamWithSerializer( + const OUString& rStreamName, + const OUString& rMediaType ); + + /** Returns new unique ID for exported document. + + @return newly created ID. + */ + sal_Int32 GetUniqueId() { return mnMaxDocId++; } + + sal_Int32 GetMaxDocId() { return mnMaxDocId; } + + void SetMaxDocId(sal_Int32 maxDocId) { mnMaxDocId = maxDocId; } + + /** Write the document properties into into the current OPC package. + + @param xProperties The document properties to export. + */ + void exportDocumentProperties( const css::uno::Reference< css::document::XDocumentProperties >& xProperties, bool bSecurityOptOpenReadOnly ); + + /** Write the customXml entries we are preserving (xlsx and pptx only). */ + void exportCustomFragments(); + + /** Read the document properties and also the customXml entries (xlsx and pptx only). */ + void importDocumentProperties(); + + static void putPropertiesToDocumentGrabBag(const css::uno::Reference& xDstDoc, + const comphelper::SequenceAsHashMap& rProperties); + + static FastParser* createParser(); + + bool isMSO2007Document() const; + bool isMSODocument() const; + + /// Signal that an MSO 2007-created SmartArt was found, need to warn the + /// user about it. + void setMissingExtDrawing(); + + void setDiagramFontHeights(NamedShapePairs* pDiagramFontHeights); + NamedShapePairs* getDiagramFontHeights(); + + void checkDocumentProperties( + const css::uno::Reference& xDocProps); + + OUString getNamespaceURL(sal_Int32 nNSID) const; + +protected: + virtual css::uno::Reference< css::io::XInputStream > + implGetInputStream( utl::MediaDescriptor& rMediaDesc ) const override; + + virtual css::uno::Reference< css::io::XStream > + implGetOutputStream( utl::MediaDescriptor& rMediaDesc ) const override; + + virtual bool implFinalizeExport( utl::MediaDescriptor& rMediaDescriptor ) override; + +private: + virtual StorageRef implCreateStorage( + const css::uno::Reference< css::io::XInputStream >& rxInStream ) const override; + virtual StorageRef implCreateStorage( + const css::uno::Reference< css::io::XStream >& rxOutStream ) const override; + + void importCustomFragments(css::uno::Reference const & xDocumentStorage); + +private: + ::std::unique_ptr< XmlFilterBaseImpl > mxImpl; + sal_Int32 mnRelId; + sal_Int32 mnMaxDocId; + bool mbMSO2007; + bool mbMSO; +protected: + bool mbMissingExtDrawing; +}; + +} // namespace oox::core + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3