diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sdext/source/pdfimport/inc | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sdext/source/pdfimport/inc')
-rw-r--r-- | sdext/source/pdfimport/inc/contentsink.hxx | 181 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/genericelements.hxx | 319 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/imagecontainer.hxx | 50 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/odfemitter.hxx | 36 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/pdfihelper.hxx | 234 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/pdfiprocessor.hxx | 212 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/pdfparse.hxx | 302 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/saxemitter.hxx | 36 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/treevisiting.hxx | 62 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/treevisitorfactory.hxx | 59 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/wrapper.hxx | 94 | ||||
-rw-r--r-- | sdext/source/pdfimport/inc/xmlemitter.hxx | 53 |
12 files changed, 1638 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/inc/contentsink.hxx b/sdext/source/pdfimport/inc/contentsink.hxx new file mode 100644 index 0000000000..44f609c3ca --- /dev/null +++ b/sdext/source/pdfimport/inc/contentsink.hxx @@ -0,0 +1,181 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_CONTENTSINK_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_CONTENTSINK_HXX + +#include <com/sun/star/uno/Reference.hxx> +#include <com/sun/star/uno/Sequence.hxx> +#include <com/sun/star/rendering/ARGBColor.hpp> +#include <memory> + +namespace com::sun::star { + namespace rendering + { + class XPolyPolygon2D; + } + namespace geometry + { + struct Matrix2D; + struct AffineMatrix2D; + struct RealRectangle2D; + struct RealPoint2D; + struct RealSize2D; + } + namespace beans + { + struct PropertyValue; + } +} + +namespace pdfi +{ + struct FontAttributes + { + FontAttributes( OUString familyName_, + OUString sFontWeight, + bool isItalic_, + bool isUnderline_, + double size_, + double ascent_) : + familyName(std::move(familyName_)), + fontWeight(std::move(sFontWeight)), + isItalic(isItalic_), + isUnderline(isUnderline_), + isOutline(false), + size(size_), + ascent(ascent_) + {} + + FontAttributes() : + familyName(), + fontWeight(u"normal"_ustr), + isItalic(false), + isUnderline(false), + isOutline(false), + size(0.0), + ascent(1.0) + {} + + OUString familyName; + OUString fontWeight; + bool isItalic; + bool isUnderline; + bool isOutline; + double size; // device pixel + double ascent; + + bool operator==(const FontAttributes& rFont) const + { + return familyName == rFont.familyName && + fontWeight == rFont.fontWeight && + !isItalic == !rFont.isItalic && + !isUnderline == !rFont.isUnderline && + !isOutline == !rFont.isOutline && + size == rFont.size && + ascent == rFont.ascent; + } + }; + + /** (preliminary) API wrapper around xpdf + + Wraps the functionality currently used from xpdf's OutputDev + interface. Subject to change. + */ + struct ContentSink + { + virtual ~ContentSink() {} + + /// Total number of pages for upcoming document + virtual void setPageNum( sal_Int32 nNumPages ) = 0; + virtual void startPage( const css::geometry::RealSize2D& rSize ) = 0; + virtual void endPage() = 0; + + virtual void hyperLink( const css::geometry::RealRectangle2D& rBounds, + const OUString& rURI ) = 0; + + virtual void pushState() = 0; + virtual void popState() = 0; + + virtual void setFlatness( double ) = 0; + virtual void setTransformation( const css::geometry::AffineMatrix2D& rMatrix ) = 0; + virtual void setLineDash( const css::uno::Sequence<double>& dashes, + double start ) = 0; + virtual void setLineJoin( sal_Int8 lineJoin ) = 0; + virtual void setLineCap( sal_Int8 lineCap ) = 0; + virtual void setMiterLimit(double) = 0; + virtual void setLineWidth(double) = 0; + virtual void setFillColor( const css::rendering::ARGBColor& rColor ) = 0; + virtual void setStrokeColor( const css::rendering::ARGBColor& rColor ) = 0; + virtual void setFont( const FontAttributes& rFont ) = 0; + virtual void setTextRenderMode( sal_Int32 ) = 0; + + + virtual void strokePath( const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath ) = 0; + virtual void fillPath( const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath ) = 0; + virtual void eoFillPath( const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath ) = 0; + + virtual void intersectClip(const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath) = 0; + virtual void intersectEoClip(const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath) = 0; + + virtual void drawGlyphs( const OUString& rGlyphs, + const css::geometry::RealRectangle2D& rRect, + const css::geometry::Matrix2D& rFontMatrix, + double fontSize) = 0; + + /// issued when a sequence of associated glyphs is drawn + virtual void endText() = 0; + + /// draws given bitmap as a mask (using current fill color) + virtual void drawMask(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap, + bool bInvert ) = 0; + /// Given image must already be color-mapped and normalized to sRGB. + virtual void drawImage(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap ) = 0; + /** Given image must already be color-mapped and normalized to sRGB. + + maskColors must contain two sequences of color components + */ + virtual void drawColorMaskedImage(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap, + const css::uno::Sequence< + css::uno::Any>& xMaskColors ) = 0; + virtual void drawMaskedImage(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap, + const css::uno::Sequence< + css::beans::PropertyValue>& xMask, + bool bInvertMask) = 0; + virtual void drawAlphaMaskedImage(const css::uno::Sequence< + css::beans::PropertyValue>& xImage, + const css::uno::Sequence< + css::beans::PropertyValue>& xMask) = 0; + }; + + typedef std::shared_ptr<ContentSink> ContentSinkSharedPtr; +} + +#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_CONTENTSINK_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/genericelements.hxx b/sdext/source/pdfimport/inc/genericelements.hxx new file mode 100644 index 0000000000..6d1459a1f0 --- /dev/null +++ b/sdext/source/pdfimport/inc/genericelements.hxx @@ -0,0 +1,319 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_GENERICELEMENTS_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_GENERICELEMENTS_HXX + +#include "pdfihelper.hxx" +#include "treevisiting.hxx" + +#include <com/sun/star/task/XStatusIndicator.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/i18n/BreakIterator.hpp> +#include <basegfx/polygon/b2dpolypolygon.hxx> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> + +#include <list> + +namespace pdfi +{ + class XmlEmitter; + class StyleContainer; + class ImageContainer; + class PDFIProcessor; + class ElementFactory; + + + struct EmitContext + { + EmitContext( + XmlEmitter& _rEmitter, + StyleContainer& _rStyles, + ImageContainer& _rImages, + PDFIProcessor& _rProcessor, + const css::uno::Reference< + css::task::XStatusIndicator>& _xStatusIndicator, + css::uno::Reference< css::uno::XComponentContext > const & xContext) + : + rEmitter(_rEmitter), + rStyles(_rStyles), + rImages(_rImages), + rProcessor(_rProcessor), + xStatusIndicator(_xStatusIndicator), + m_xContext(xContext) + {} + + XmlEmitter& rEmitter; + StyleContainer& rStyles; + ImageContainer& rImages; + PDFIProcessor& rProcessor; + css::uno::Reference< + css::task::XStatusIndicator> xStatusIndicator; + css::uno::Reference< + css::uno::XComponentContext > m_xContext; + }; + + struct Element + { + protected: + explicit Element( Element* pParent ) + : x( 0 ), y( 0 ), w( 0 ), h( 0 ), StyleId( -1 ), Parent( pParent ) + { + if( pParent ) + pParent->Children.emplace_back( this ); + } + + public: + virtual ~Element(); + + /** + To be implemented by every tree node that needs to be + visitable. + */ + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) = 0; + /// Apply visitor to all children + void applyToChildren( ElementTreeVisitor& ); + /// Union element geometry with given element + void updateGeometryWith( const Element* pMergeFrom ); + + /// To avoid some dynamic_cast cost + virtual const TextElement* dynCastAsTextElement() const { return nullptr; } + virtual TextElement* dynCastAsTextElement() { return nullptr; } + +#if OSL_DEBUG_LEVEL > 0 + // xxx refact TODO: move code to visitor + virtual void emitStructure( int nLevel ); +#endif + /** el must be a valid dereferenceable iterator of el->Parent->Children + pNewParent must not be NULL + */ + static void setParent( std::list<std::unique_ptr<Element>>::iterator const & el, Element* pNewParent ); + + double x, y, w, h; + sal_Int32 StyleId; + Element* Parent; + std::list<std::unique_ptr<Element>> Children; + }; + + struct ListElement final : public Element + { + ListElement() : Element( nullptr ) {} + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + struct HyperlinkElement final : public Element + { + friend class ElementFactory; + HyperlinkElement( Element* pParent, const OUString& rURI ) + : Element( pParent ), URI( rURI ) {} + public: + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + + OUString URI; + }; + + struct GraphicalElement : public Element + { + protected: + GraphicalElement(Element* pParent, sal_Int32 nGCId) + : Element(pParent) + , GCId(nGCId) + , MirrorVertical(false) + , IsForText(false) + , FontSize(0.0) + , TextStyleId(0) + { + } + + public: + sal_Int32 GCId; + bool MirrorVertical; + bool IsForText; + double FontSize; + sal_Int32 TextStyleId; + }; + + struct DrawElement : public GraphicalElement + { + protected: + DrawElement( Element* pParent, sal_Int32 nGCId ) + : GraphicalElement( pParent, nGCId ), isCharacter(false), ZOrder(0) {} + + public: + bool isCharacter; + sal_Int32 ZOrder; + }; + + struct FrameElement final : public DrawElement + { + friend class ElementFactory; + FrameElement( Element* pParent, sal_Int32 nGCId ) + : DrawElement( pParent, nGCId ) {} + + public: + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + struct TextElement final : public GraphicalElement + { + friend class ElementFactory; + TextElement( Element* pParent, sal_Int32 nGCId, sal_Int32 nFontId ) + : GraphicalElement( pParent, nGCId ), FontId( nFontId ) {} + + public: + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + + virtual const TextElement* dynCastAsTextElement() const override { return this; } + virtual TextElement* dynCastAsTextElement() override { return this; } + + OUStringBuffer Text; + sal_Int32 FontId; + }; + + struct ParagraphElement final : public Element + { + friend class ElementFactory; + explicit ParagraphElement( Element* pParent ) : Element( pParent ), Type( Normal ), bRtl( false ) {} + + public: + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) override; + + // returns true only if only a single line is contained + bool isSingleLined( PDFIProcessor const & rProc ) const; + // returns the highest line height of the contained textelements + // line height is font height if the text element is itself multilined + double getLineHeight( PDFIProcessor& rProc ) const; + // returns the first text element child; does not recurse through subparagraphs + TextElement* getFirstTextChild() const; + + enum ParagraphType { Normal, Headline }; + ParagraphType Type; + bool bRtl; + }; + + struct PolyPolyElement final : public DrawElement + { + friend class ElementFactory; + PolyPolyElement( Element* pParent, sal_Int32 nGCId, + const basegfx::B2DPolyPolygon& rPolyPoly, + sal_Int8 nAction ); + public: + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) override; + + void updateGeometry(); + +#if OSL_DEBUG_LEVEL > 0 + virtual void emitStructure( int nLevel ) override; +#endif + + basegfx::B2DPolyPolygon PolyPoly; + sal_Int8 Action; + }; + + struct ImageElement final : public DrawElement + { + friend class ElementFactory; + ImageElement( Element* pParent, sal_Int32 nGCId, ImageId nImage ) + : DrawElement( pParent, nGCId ), Image( nImage ) {} + + public: + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + + ImageId Image; + }; + + struct PageElement final : public Element + { + friend class ElementFactory; + PageElement( Element* pParent, sal_Int32 nPageNr ) + : Element( pParent ), PageNumber( nPageNr ), Hyperlinks(), + TopMargin( 0.0 ), BottomMargin( 0.0 ), LeftMargin( 0.0 ), RightMargin( 0.0 ) + {} + private: + // helper method for resolveHyperlinks + bool resolveHyperlink( const std::list<std::unique_ptr<Element>>::iterator& link_it, std::list<std::unique_ptr<Element>>& rElements ); + public: + virtual ~PageElement() override; + + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) override; + + void resolveHyperlinks(); + void resolveFontStyles( PDFIProcessor const & rProc ); + void resolveUnderlines( PDFIProcessor const & rProc ); + + sal_Int32 PageNumber; + ListElement Hyperlinks; // contains not yet realized links on this page + double TopMargin; + double BottomMargin; + double LeftMargin; + double RightMargin; + std::unique_ptr<Element> HeaderElement; + std::unique_ptr<Element> FooterElement; + }; + + struct DocumentElement final : public Element + { + friend class ElementFactory; + public: + DocumentElement() : Element( nullptr ) {} + virtual ~DocumentElement() override; + + virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + // this class is the differentiator of document types: it will create + // Element objects with an optimize() method suitable for the document type + class ElementFactory + { + public: + ElementFactory() = delete; + + static HyperlinkElement* createHyperlinkElement( Element* pParent, const OUString& rURI ) + { return new HyperlinkElement( pParent, rURI ); } + + static TextElement* createTextElement( Element* pParent, sal_Int32 nGCId, sal_Int32 nFontId ) + { return new TextElement( pParent, nGCId, nFontId ); } + static ParagraphElement* createParagraphElement( Element* pParent ) + { return new ParagraphElement( pParent ); } + + static FrameElement* createFrameElement( Element* pParent, sal_Int32 nGCId ) + { return new FrameElement( pParent, nGCId ); } + static PolyPolyElement* + createPolyPolyElement( Element* pParent, + sal_Int32 nGCId, + const basegfx::B2DPolyPolygon& rPolyPoly, + sal_Int8 nAction) + { return new PolyPolyElement( pParent, nGCId, rPolyPoly, nAction ); } + static ImageElement* createImageElement( Element* pParent, sal_Int32 nGCId, ImageId nImage ) + { return new ImageElement( pParent, nGCId, nImage ); } + + static PageElement* createPageElement( Element* pParent, + sal_Int32 nPageNr ) + { return new PageElement( pParent, nPageNr ); } + static std::shared_ptr<DocumentElement> createDocumentElement() + { return std::make_shared<DocumentElement>(); } + }; + + bool isComplex(const css::uno::Reference<css::i18n::XBreakIterator>& rBreakIterator, TextElement* const pTextElem); +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/imagecontainer.hxx b/sdext/source/pdfimport/inc/imagecontainer.hxx new file mode 100644 index 0000000000..d21ed7504f --- /dev/null +++ b/sdext/source/pdfimport/inc/imagecontainer.hxx @@ -0,0 +1,50 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_IMAGECONTAINER_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_IMAGECONTAINER_HXX + +#include "pdfihelper.hxx" + +#include <com/sun/star/beans/PropertyValue.hpp> + +#include <vector> + +namespace pdfi +{ + struct EmitContext; + + class ImageContainer + { + private: + std::vector< css::uno::Sequence< + css::beans::PropertyValue> > m_aImages; + + public: + ImageContainer(); + + ImageId addImage( const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap ); + void writeBase64EncodedStream( ImageId nImageId, EmitContext& rContext ); + }; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/odfemitter.hxx b/sdext/source/pdfimport/inc/odfemitter.hxx new file mode 100644 index 0000000000..97111c2991 --- /dev/null +++ b/sdext/source/pdfimport/inc/odfemitter.hxx @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_ODFEMITTER_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_ODFEMITTER_HXX + +#include "xmlemitter.hxx" +#include <com/sun/star/uno/Reference.hxx> + +namespace com::sun::star::io { class XOutputStream; } + +namespace pdfi +{ + XmlEmitterSharedPtr createOdfEmitter( const css::uno::Reference< + css::io::XOutputStream>& xOut ); +} + +#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_ODFEMITTER_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/pdfihelper.hxx b/sdext/source/pdfimport/inc/pdfihelper.hxx new file mode 100644 index 0000000000..c6be1a6ba7 --- /dev/null +++ b/sdext/source/pdfimport/inc/pdfihelper.hxx @@ -0,0 +1,234 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIHELPER_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIHELPER_HXX + +#include "contentsink.hxx" + +#include <rtl/ustring.hxx> +#include <rtl/math.h> +#include <basegfx/matrix/b2dhommatrix.hxx> +#include <basegfx/polygon/b2dpolypolygon.hxx> +#include <basegfx/polygon/b2dpolygon.hxx> +#include <com/sun/star/rendering/PathCapType.hpp> +#include <com/sun/star/rendering/PathJoinType.hpp> + +#include <unordered_map> +#include <vector> +#include <o3tl/hash_combine.hxx> + +// virtual resolution of the PDF OutputDev in dpi +#define PDFI_OUTDEV_RESOLUTION 7200 + +namespace com::sun::star::task { class XInteractionHandler; } + +namespace pdfi +{ + typedef std::unordered_map< OUString, OUString > PropertyMap; + typedef sal_Int32 ImageId; + + /// What to do with a polygon. values can be ORed together + enum PolygonAction { PATH_STROKE=1, PATH_FILL=2, PATH_EOFILL=4 }; + + OUString unitMMString( double fMM ); + OUString convertPixelToUnitString( double fPix ); + + inline double convPx2mm( double fPix ) + { + const double px2mm = 25.4/PDFI_OUTDEV_RESOLUTION; + fPix *= px2mm; + return fPix; + } + + inline double convmm2Px( double fMM ) + { + const double mm2px = PDFI_OUTDEV_RESOLUTION/25.4; + fMM *= mm2px; + return fMM; + } + + /// round to 2 decimal places + inline double convPx2mmPrec2( double fPix ) + { + constexpr double px2mm = 25.4/PDFI_OUTDEV_RESOLUTION; + double mm = fPix * ( px2mm * 100); + return std::floor(mm) / 100; + } + + /// Convert color to "#FEFEFE" color notation + OUString getColorString( const css::rendering::ARGBColor& ); + OUString getPercentString(double value); + + double GetAverageTransformationScale(const basegfx::B2DHomMatrix& matrix); + void FillDashStyleProps(PropertyMap& props, const std::vector<double>& dashArray, double scale); + + struct FontAttrHash + { + size_t operator()(const FontAttributes& rFont ) const + { + std::size_t seed = 0; + o3tl::hash_combine(seed, rFont.familyName.hashCode()); + o3tl::hash_combine(seed, rFont.fontWeight); + o3tl::hash_combine(seed, rFont.isItalic); + o3tl::hash_combine(seed, rFont.isUnderline); + o3tl::hash_combine(seed, rFont.isOutline); + o3tl::hash_combine(seed, rFont.size); + return seed; + } + }; + + struct GraphicsContext + { + css::rendering::ARGBColor LineColor; + css::rendering::ARGBColor FillColor; + sal_Int8 LineJoin; + sal_Int8 LineCap; + sal_Int8 BlendMode; + double Flatness; + double LineWidth; + double MiterLimit; + std::vector<double> DashArray; + sal_Int32 FontId; + sal_Int32 TextRenderMode; + basegfx::B2DHomMatrix Transformation; + basegfx::B2DPolyPolygon Clip; + + GraphicsContext() : + LineColor(), + FillColor(), + LineJoin(0), + LineCap(0), + BlendMode(0), + Flatness(0.0), + LineWidth(1.0), + MiterLimit(10.0), + DashArray(), + FontId(0), + TextRenderMode(0), + Transformation(), + Clip() + {} + + bool operator==(const GraphicsContext& rRight ) const + { + return LineColor.Red == rRight.LineColor.Red && + LineColor.Green == rRight.LineColor.Green && + LineColor.Blue == rRight.LineColor.Blue && + LineColor.Alpha == rRight.LineColor.Alpha && + FillColor.Red == rRight.FillColor.Red && + FillColor.Green == rRight.FillColor.Green && + FillColor.Blue == rRight.FillColor.Blue && + FillColor.Alpha == rRight.FillColor.Alpha && + LineJoin == rRight.LineJoin && + LineCap == rRight.LineCap && + BlendMode == rRight.BlendMode && + LineWidth == rRight.LineWidth && + Flatness == rRight.Flatness && + MiterLimit == rRight.MiterLimit && + DashArray == rRight.DashArray && + FontId == rRight.FontId && + TextRenderMode == rRight.TextRenderMode && + Transformation == rRight.Transformation && + Clip == rRight.Clip; + } + + OUString GetLineJoinString() const + { + switch (LineJoin) + { + default: + case css::rendering::PathJoinType::MITER: + return "miter"; + case css::rendering::PathJoinType::ROUND: + return "round"; + case css::rendering::PathJoinType::BEVEL: + return "bevel"; + } + } + + OUString GetLineCapString() const + { + switch (LineCap) + { + default: + case css::rendering::PathCapType::BUTT: + return "butt"; + case css::rendering::PathCapType::ROUND: + return "round"; + case css::rendering::PathCapType::SQUARE: + return "square"; + } + } + + bool isRotatedOrSkewed() const + { return Transformation.get( 0, 1 ) != 0.0 || + Transformation.get( 1, 0 ) != 0.0; } + }; + + struct GraphicsContextHash + { + size_t operator()(const GraphicsContext& rGC ) const + { + std::size_t seed = 0; + o3tl::hash_combine(seed, rGC.LineColor.Red); + o3tl::hash_combine(seed, rGC.LineColor.Green); + o3tl::hash_combine(seed, rGC.LineColor.Blue); + o3tl::hash_combine(seed, rGC.LineColor.Alpha); + o3tl::hash_combine(seed, rGC.FillColor.Red); + o3tl::hash_combine(seed, rGC.FillColor.Green); + o3tl::hash_combine(seed, rGC.FillColor.Blue); + o3tl::hash_combine(seed, rGC.FillColor.Alpha); + o3tl::hash_combine(seed, rGC.LineJoin); + o3tl::hash_combine(seed, rGC.LineCap); + o3tl::hash_combine(seed, rGC.BlendMode); + o3tl::hash_combine(seed, rGC.LineWidth); + o3tl::hash_combine(seed, rGC.Flatness); + o3tl::hash_combine(seed, rGC.MiterLimit); + o3tl::hash_combine(seed, rGC.DashArray.size()); + o3tl::hash_combine(seed, rGC.FontId); + o3tl::hash_combine(seed, rGC.TextRenderMode); + o3tl::hash_combine(seed, rGC.Transformation.get( 0, 0 )); + o3tl::hash_combine(seed, rGC.Transformation.get( 1, 0 )); + o3tl::hash_combine(seed, rGC.Transformation.get( 0, 1 )); + o3tl::hash_combine(seed, rGC.Transformation.get( 1, 1 )); + o3tl::hash_combine(seed, rGC.Transformation.get( 0, 2 )); + o3tl::hash_combine(seed, rGC.Transformation.get( 1, 2 )); + o3tl::hash_combine(seed, rGC.Clip.count() ? rGC.Clip.getB2DPolygon(0).count() : 0); + return seed; + } + }; + + /** retrieve password from user + */ + bool getPassword( const css::uno::Reference< + css::task::XInteractionHandler >& xHandler, + OUString& rOutPwd, + bool bFirstTry, + const OUString& rDocName + ); + + void reportUnsupportedEncryptionFormat( + css::uno::Reference< + css::task::XInteractionHandler > const & handler); +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/pdfiprocessor.hxx b/sdext/source/pdfimport/inc/pdfiprocessor.hxx new file mode 100644 index 0000000000..3fdc146716 --- /dev/null +++ b/sdext/source/pdfimport/inc/pdfiprocessor.hxx @@ -0,0 +1,212 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIPROCESSOR_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIPROCESSOR_HXX + +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/task/XStatusIndicator.hpp> +#include <com/sun/star/geometry/RealSize2D.hpp> +#include <com/sun/star/geometry/RealRectangle2D.hpp> +#include <com/sun/star/geometry/Matrix2D.hpp> + +#include <basegfx/matrix/b2dhommatrix.hxx> + +#include <rtl/ustring.hxx> + +#include <memory> +#include <unordered_map> + +#include "imagecontainer.hxx" +#include "contentsink.hxx" +#include "treevisitorfactory.hxx" +#include "genericelements.hxx" + +namespace pdfi +{ + + class PDFIProcessor; + struct Element; + struct DocumentElement; + struct PageElement; + class ElementFactory; + class XmlEmitter; + class CharGlyph; + + /** Main entry from the parser + + Creates the internal DOM tree from the render calls + */ + class PDFIProcessor final : public ContentSink + { + public: + css::uno::Reference< + css::uno::XComponentContext > m_xContext; + basegfx::B2DHomMatrix prevTextMatrix; + double prevCharWidth; + + explicit PDFIProcessor( const css::uno::Reference< css::task::XStatusIndicator >& xStat, + css::uno::Reference< css::uno::XComponentContext > const & xContext) ; + + void emit( XmlEmitter& rEmitter, + const TreeVisitorFactory& rVisitorFactory ); + + sal_Int32 getGCId( const GraphicsContext& rGC ); + const GraphicsContext& getGraphicsContext( sal_Int32 nGCId ) const; + GraphicsContext& getCurrentContext() { return m_aGCStack.back(); } + const GraphicsContext& getCurrentContext() const { return m_aGCStack.back(); } + + const css::uno::Reference< css::task::XStatusIndicator >& getStatusIndicator() const + { return m_xStatusIndicator; } + + const FontAttributes& getFont( sal_Int32 nFontId ) const; + sal_Int32 getFontId( const FontAttributes& rAttr ) const; + + static void sortElements( Element* pElement ); + + static OUString SubstituteBidiMirrored(const OUString& rString); + + private: + void processGlyphLine(); + + // ContentSink interface implementation + + virtual void setPageNum( sal_Int32 nNumPages ) override; + virtual void startPage( const css::geometry::RealSize2D& rSize ) override; + virtual void endPage() override; + + virtual void hyperLink( const css::geometry::RealRectangle2D& rBounds, + const OUString& rURI ) override; + virtual void pushState() override; + virtual void popState() override; + virtual void setFlatness( double ) override; + virtual void setTransformation( const css::geometry::AffineMatrix2D& rMatrix ) override; + virtual void setLineDash( const css::uno::Sequence<double>& dashes, + double start ) override; + virtual void setLineJoin(sal_Int8) override; + virtual void setLineCap(sal_Int8) override; + virtual void setMiterLimit(double) override; + virtual void setLineWidth(double) override; + virtual void setFillColor( const css::rendering::ARGBColor& rColor ) override; + virtual void setStrokeColor( const css::rendering::ARGBColor& rColor ) override; + virtual void setFont( const FontAttributes& rFont ) override; + virtual void setTextRenderMode( sal_Int32 ) override; + + virtual void strokePath( const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath ) override; + virtual void fillPath( const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath ) override; + virtual void eoFillPath( const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath ) override; + + virtual void intersectClip(const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath) override; + virtual void intersectEoClip(const css::uno::Reference< + css::rendering::XPolyPolygon2D >& rPath) override; + + virtual void drawGlyphs( const OUString& rGlyphs, + const css::geometry::RealRectangle2D& rRect, + const css::geometry::Matrix2D& rFontMatrix, + double fontSize) override; + virtual void endText() override; + + virtual void drawMask(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap, + bool bInvert ) override; + /// Given image must already be color-mapped and normalized to sRGB. + virtual void drawImage(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap ) override; + /** Given image must already be color-mapped and normalized to sRGB. + + maskColors must contain two sequences of color components + */ + virtual void drawColorMaskedImage(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap, + const css::uno::Sequence< + css::uno::Any>& xMaskColors ) override; + virtual void drawMaskedImage(const css::uno::Sequence< + css::beans::PropertyValue>& xBitmap, + const css::uno::Sequence< + css::beans::PropertyValue>& xMask, + bool bInvertMask) override; + virtual void drawAlphaMaskedImage(const css::uno::Sequence< + css::beans::PropertyValue>& xImage, + const css::uno::Sequence< + css::beans::PropertyValue>& xMask) override; + + void startIndicator( const OUString& rText ); + void endIndicator(); + + void setupImage(ImageId nImage); + + typedef std::unordered_map<sal_Int32,FontAttributes> IdToFontMap; + typedef std::unordered_map<FontAttributes,sal_Int32,FontAttrHash> FontToIdMap; + + typedef std::unordered_map<sal_Int32,GraphicsContext> IdToGCMap; + typedef std::unordered_map<GraphicsContext, sal_Int32, GraphicsContextHash> GCToIdMap; + + typedef std::vector<GraphicsContext> GraphicsContextStack; + + std::vector<CharGlyph> m_GlyphsList; + + std::shared_ptr<DocumentElement> m_pDocument; + PageElement* m_pCurPage; + Element* m_pCurElement; + sal_Int32 m_nNextFontId; + IdToFontMap m_aIdToFont; + FontToIdMap m_aFontToId; + + GraphicsContextStack m_aGCStack; + sal_Int32 m_nNextGCId; + IdToGCMap m_aIdToGC; + GCToIdMap m_aGCToId; + + ImageContainer m_aImages; + + sal_Int32 m_nPages; + sal_Int32 m_nNextZOrder; + css::uno::Reference< css::task::XStatusIndicator > + m_xStatusIndicator; + }; + class CharGlyph final + { + public: + CharGlyph(Element* pCurElement, const GraphicsContext& rCurrentContext, + double width, double prevSpaceWidth, const OUString& rGlyphs ) + : m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext), + m_Width(width), m_PrevSpaceWidth(prevSpaceWidth), m_rGlyphs(rGlyphs) {}; + + OUString& getGlyph(){ return m_rGlyphs; } + double getWidth() const { return m_Width; } + double getPrevSpaceWidth() const { return m_PrevSpaceWidth; } + GraphicsContext& getGC(){ return m_rCurrentContext; } + Element* getCurElement(){ return m_pCurElement; } + + private: + Element* m_pCurElement ; + GraphicsContext m_rCurrentContext ; + double m_Width ; + double m_PrevSpaceWidth ; + OUString m_rGlyphs ; + }; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx b/sdext/source/pdfimport/inc/pdfparse.hxx new file mode 100644 index 0000000000..542a9ed4b1 --- /dev/null +++ b/sdext/source/pdfimport/inc/pdfparse.hxx @@ -0,0 +1,302 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX + +#include <sal/types.h> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> + +#include <string_view> +#include <unordered_map> +#include <utility> +#include <vector> +#include <memory> + +namespace pdfparse +{ + +struct EmitImplData; +struct PDFContainer; +class EmitContext +{ +public: + virtual bool write( const void* pBuf, unsigned int nLen ) = 0; + virtual unsigned int getCurPos() = 0; + virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0; + virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0; + + explicit EmitContext( const PDFContainer* pTop = nullptr ); + virtual ~EmitContext(); + + // set this to deflate contained streams + bool m_bDeflate; + // set this to decrypt the PDF file + bool m_bDecrypt; + +private: + friend struct PDFEntry; + std::unique_ptr<EmitImplData> m_pImplData; +}; + +struct PDFEntry +{ + PDFEntry() {} + virtual ~PDFEntry(); + + virtual bool emit( EmitContext& rWriteContext ) const = 0; + virtual PDFEntry* clone() const = 0; + +protected: + static EmitImplData* getEmitData( EmitContext const & rContext ); + static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ); +}; + +struct PDFComment final : public PDFEntry +{ + OString m_aComment; + + explicit PDFComment( OString aComment ) + : PDFEntry(), m_aComment(std::move( aComment )) {} + virtual ~PDFComment() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFValue : public PDFEntry +{ + // abstract base class for simple values + PDFValue() : PDFEntry() {} + virtual ~PDFValue() override; +}; + +struct PDFName final : public PDFValue +{ + OString m_aName; + + explicit PDFName( OString aName ) + : PDFValue(), m_aName(std::move( aName )) {} + virtual ~PDFName() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; + + OUString getFilteredName() const; +}; + +struct PDFString final : public PDFValue +{ + OString m_aString; + + explicit PDFString( OString aString ) + : PDFValue(), m_aString(std::move( aString )) {} + virtual ~PDFString() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; + + OString getFilteredString() const; +}; + +struct PDFNumber final : public PDFValue +{ + double m_fValue; + + explicit PDFNumber( double fVal ) + : PDFValue(), m_fValue( fVal ) {} + virtual ~PDFNumber() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFBool final : public PDFValue +{ + bool m_bValue; + + explicit PDFBool( bool bVal ) + : PDFValue(), m_bValue( bVal ) {} + virtual ~PDFBool() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFObjectRef final : public PDFValue +{ + unsigned int m_nNumber; + unsigned int m_nGeneration; + + PDFObjectRef( unsigned int nNr, unsigned int nGen ) + : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {} + virtual ~PDFObjectRef() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFNull final : public PDFValue +{ + PDFNull() {} + virtual ~PDFNull() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFObject; +struct PDFContainer : public PDFEntry +{ + sal_Int32 m_nOffset; + std::vector<std::unique_ptr<PDFEntry>> m_aSubElements; + + // this is an abstract base class for identifying + // entries that can contain sub elements besides comments + PDFContainer() : PDFEntry(), m_nOffset( 0 ) {} + virtual ~PDFContainer() override; + bool emitSubElements( EmitContext& rWriteContext ) const; + void cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const; + + PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const; + PDFObject* findObject( PDFObjectRef const * pRef ) const + { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); } +}; + +struct PDFArray final : public PDFContainer +{ + PDFArray() {} + virtual ~PDFArray() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFDict final : public PDFContainer +{ + typedef std::unordered_map<OString,PDFEntry*> Map; + Map m_aMap; + + PDFDict() {} + virtual ~PDFDict() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; + + // inserting a value of NULL will remove rName and the previous value + // from the dictionary + void insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue ); + // removes a name/value pair from the dict + void eraseValue( std::string_view rName ); + // builds new map as of sub elements + // returns NULL if successful, else the first offending element + PDFEntry* buildMap(); +}; + +struct PDFStream final : public PDFEntry +{ + unsigned int m_nBeginOffset; + unsigned int m_nEndOffset; // offset of the byte after the stream + PDFDict* m_pDict; + + PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict ) + : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {} + virtual ~PDFStream() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; + + unsigned int getDictLength( const PDFContainer* pObjectContainer ) const; // get contents of the "Length" entry of the dict +}; + +struct PDFTrailer final : public PDFContainer +{ + PDFDict* m_pDict; + + PDFTrailer() : PDFContainer(), m_pDict( nullptr ) {} + virtual ~PDFTrailer() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFFileImplData; +struct PDFFile final : public PDFContainer +{ +private: + mutable std::unique_ptr<PDFFileImplData> m_pData; + PDFFileImplData* impl_getData() const; +public: + unsigned int m_nMajor; // PDF major + unsigned int m_nMinor; // PDF minor + + PDFFile(); + virtual ~PDFFile() override; + + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; + + bool isEncrypted() const; + + bool usesSupportedEncryptionFormat() const; + + // this method checks whether rPwd is compatible with + // either user or owner password and sets up decrypt data in that case + // returns true if decryption can be done + bool setupDecryptionData( const OString& rPwd ) const; + + bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, + sal_uInt8* pOutBuffer, + unsigned int nObject, unsigned int nGeneration ) const; +}; + +struct PDFObject final : public PDFContainer +{ + PDFEntry* m_pObject; + PDFStream* m_pStream; + unsigned int m_nNumber; + unsigned int m_nGeneration; + + PDFObject( unsigned int nNr, unsigned int nGen ) + : m_pObject( nullptr ), m_pStream( nullptr ), m_nNumber( nNr ), m_nGeneration( nGen ) {} + virtual ~PDFObject() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; + + // writes only the contained stream, deflated if necessary + void writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const; + +private: + // returns true if stream is deflated + // fills *ppStream and *pBytes with start of stream and count of bytes + // memory returned in *ppStream must be freed with std::free afterwards + // fills in NULL and 0 in case of error + bool getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const; +}; + +struct PDFPart final : public PDFContainer +{ + PDFPart() : PDFContainer() {} + virtual ~PDFPart() override; + virtual bool emit( EmitContext& rWriteContext ) const override; + virtual PDFEntry* clone() const override; +}; + +struct PDFReader +{ + PDFReader() = delete; + + static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName); +}; + +} // namespace + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/saxemitter.hxx b/sdext/source/pdfimport/inc/saxemitter.hxx new file mode 100644 index 0000000000..a01b03c5de --- /dev/null +++ b/sdext/source/pdfimport/inc/saxemitter.hxx @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_SAXEMITTER_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_SAXEMITTER_HXX + +#include "xmlemitter.hxx" +#include <com/sun/star/uno/Reference.hxx> + +namespace com::sun::star::xml::sax { class XDocumentHandler; } + +namespace pdfi +{ + XmlEmitterSharedPtr createSaxEmitter( const css::uno::Reference< + css::xml::sax::XDocumentHandler >& xDocHdl ); +} + +#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_SAXEMITTER_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/treevisiting.hxx b/sdext/source/pdfimport/inc/treevisiting.hxx new file mode 100644 index 0000000000..120166594a --- /dev/null +++ b/sdext/source/pdfimport/inc/treevisiting.hxx @@ -0,0 +1,62 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITING_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITING_HXX + +#include <sal/config.h> +#include <list> +#include <memory> + +namespace pdfi +{ + struct HyperlinkElement; + struct TextElement; + struct ParagraphElement; + struct FrameElement; + struct PolyPolyElement; + struct ImageElement; + struct PageElement; + struct DocumentElement; + struct Element; + + /** To be visited by all tree element types + + Visitor interface from the "visitor pattern". Implementor gets + called with actual tree node instances. + */ + struct ElementTreeVisitor + { + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0; + virtual ~ElementTreeVisitor() {} + }; + typedef std::shared_ptr<ElementTreeVisitor> ElementTreeVisitorSharedPtr; + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/treevisitorfactory.hxx b/sdext/source/pdfimport/inc/treevisitorfactory.hxx new file mode 100644 index 0000000000..45621b318e --- /dev/null +++ b/sdext/source/pdfimport/inc/treevisitorfactory.hxx @@ -0,0 +1,59 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITORFACTORY_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITORFACTORY_HXX + +#include <memory> + +namespace pdfi +{ + struct ElementTreeVisitor; + struct EmitContext; + class PDFIProcessor; + class StyleContainer; + + /** Tree manipulation factory + + Creates visitor objects performing various operations on the + pdf parse tree + */ + struct TreeVisitorFactory + { + virtual ~TreeVisitorFactory() {} + + /// Create visitor that combines tree nodes + virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor&) const = 0; + /// Create visitor that prepares style info + virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor( + StyleContainer&, PDFIProcessor&) const = 0; + /// Create visitor that emits tree to an output target + virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext&) const = 0; + }; + + typedef std::shared_ptr<TreeVisitorFactory> TreeVisitorFactorySharedPtr; + + TreeVisitorFactorySharedPtr createWriterTreeVisitorFactory(); + TreeVisitorFactorySharedPtr createImpressTreeVisitorFactory(); + TreeVisitorFactorySharedPtr createDrawTreeVisitorFactory(); +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/wrapper.hxx b/sdext/source/pdfimport/inc/wrapper.hxx new file mode 100644 index 0000000000..a27e92e289 --- /dev/null +++ b/sdext/source/pdfimport/inc/wrapper.hxx @@ -0,0 +1,94 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX + +#include "contentsink.hxx" +#include <com/sun/star/uno/Reference.hxx> +#include <com/sun/star/task/XInteractionHandler.hpp> + +namespace com::sun::star { + namespace uno { + class XComponentContext; + } + namespace io { + class XInputStream; + } +} + +namespace pdfi +{ + bool xpdf_ImportFromFile( const OUString& rURL, + const ContentSinkSharedPtr& rSink, + const css::uno::Reference< + css::task::XInteractionHandler >& xIHdl, + const OUString& rPwd, + const css::uno::Reference< + css::uno::XComponentContext >& xContext, + const OUString& rFilterOptions); + bool xpdf_ImportFromStream( const css::uno::Reference< + css::io::XInputStream >& xInput, + const ContentSinkSharedPtr& rSink, + const css::uno::Reference< + css::task::XInteractionHandler >& xIHdl, + const OUString& rPwd, + const css::uno::Reference< + css::uno::XComponentContext >& xContext, + const OUString& rFilterOptions ); + + constexpr OUString fontAttributesSuffixes[] = { + // Note: for performance consideration, each one listed here is evaluated once, + // and they are checked from the suffix, thus the order matters. + // e.g. for "TimesNewRomanPS-BoldItalic", to get "TimesNewRoman", you should + // first have "Italic", and then "Bold", then "-", and then "PS". + u"-VKana"_ustr, + u"MT"_ustr, + u"PS"_ustr, + u"PSMT"_ustr, + u"Regular"_ustr, + u"Normal"_ustr, + u"Book"_ustr, + u"Medium"_ustr, + u"ExtraBold"_ustr, + u"UltraBold"_ustr, + u"ExtraLight"_ustr, + u"UltraLight"_ustr, + u"Bold"_ustr, + u"Heavy"_ustr, + u"Black"_ustr, + u"Italic"_ustr, + u"Oblique"_ustr, + u"Bold"_ustr, //BoldItalic, BoldOblique + u"Light"_ustr, + u"Thin"_ustr, + u"Semibold"_ustr, + u"-Roman"_ustr, + u"Reg"_ustr, + u"VKana"_ustr, + u"-"_ustr, + u","_ustr, + u";"_ustr, + u"PS"_ustr, // e.g. TimesNewRomanPS-BoldMT + }; +} + +#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/inc/xmlemitter.hxx b/sdext/source/pdfimport/inc/xmlemitter.hxx new file mode 100644 index 0000000000..9ffacda7f1 --- /dev/null +++ b/sdext/source/pdfimport/inc/xmlemitter.hxx @@ -0,0 +1,53 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_XMLEMITTER_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_XMLEMITTER_HXX + +#include "pdfihelper.hxx" +#include <memory> + +namespace pdfi +{ + /** Output interface to ODF + + Should be easy to implement using either SAX events or plain ODF + */ + class XmlEmitter + { + public: + virtual ~XmlEmitter() {} + + /** Open up a tag with the given properties + */ + virtual void beginTag( const char* pTag, const PropertyMap& rProperties ) = 0; + /** Write PCTEXT as-is to output + */ + virtual void write( const OUString& rString ) = 0; + /** Close previously opened tag + */ + virtual void endTag( const char* pTag ) = 0; + }; + + typedef std::shared_ptr<XmlEmitter> XmlEmitterSharedPtr; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |