summaryrefslogtreecommitdiffstats
path: root/sdext/source/pdfimport/inc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
commit267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sdext/source/pdfimport/inc
parentInitial commit. (diff)
downloadlibreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sdext/source/pdfimport/inc')
-rw-r--r--sdext/source/pdfimport/inc/contentsink.hxx181
-rw-r--r--sdext/source/pdfimport/inc/genericelements.hxx319
-rw-r--r--sdext/source/pdfimport/inc/imagecontainer.hxx50
-rw-r--r--sdext/source/pdfimport/inc/odfemitter.hxx36
-rw-r--r--sdext/source/pdfimport/inc/pdfihelper.hxx234
-rw-r--r--sdext/source/pdfimport/inc/pdfiprocessor.hxx212
-rw-r--r--sdext/source/pdfimport/inc/pdfparse.hxx302
-rw-r--r--sdext/source/pdfimport/inc/saxemitter.hxx36
-rw-r--r--sdext/source/pdfimport/inc/treevisiting.hxx62
-rw-r--r--sdext/source/pdfimport/inc/treevisitorfactory.hxx59
-rw-r--r--sdext/source/pdfimport/inc/wrapper.hxx94
-rw-r--r--sdext/source/pdfimport/inc/xmlemitter.hxx53
12 files changed, 1638 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/inc/contentsink.hxx b/sdext/source/pdfimport/inc/contentsink.hxx
new file mode 100644
index 0000000000..44f609c3ca
--- /dev/null
+++ b/sdext/source/pdfimport/inc/contentsink.hxx
@@ -0,0 +1,181 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_CONTENTSINK_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_CONTENTSINK_HXX
+
+#include <com/sun/star/uno/Reference.hxx>
+#include <com/sun/star/uno/Sequence.hxx>
+#include <com/sun/star/rendering/ARGBColor.hpp>
+#include <memory>
+
+namespace com::sun::star {
+ namespace rendering
+ {
+ class XPolyPolygon2D;
+ }
+ namespace geometry
+ {
+ struct Matrix2D;
+ struct AffineMatrix2D;
+ struct RealRectangle2D;
+ struct RealPoint2D;
+ struct RealSize2D;
+ }
+ namespace beans
+ {
+ struct PropertyValue;
+ }
+}
+
+namespace pdfi
+{
+ struct FontAttributes
+ {
+ FontAttributes( OUString familyName_,
+ OUString sFontWeight,
+ bool isItalic_,
+ bool isUnderline_,
+ double size_,
+ double ascent_) :
+ familyName(std::move(familyName_)),
+ fontWeight(std::move(sFontWeight)),
+ isItalic(isItalic_),
+ isUnderline(isUnderline_),
+ isOutline(false),
+ size(size_),
+ ascent(ascent_)
+ {}
+
+ FontAttributes() :
+ familyName(),
+ fontWeight(u"normal"_ustr),
+ isItalic(false),
+ isUnderline(false),
+ isOutline(false),
+ size(0.0),
+ ascent(1.0)
+ {}
+
+ OUString familyName;
+ OUString fontWeight;
+ bool isItalic;
+ bool isUnderline;
+ bool isOutline;
+ double size; // device pixel
+ double ascent;
+
+ bool operator==(const FontAttributes& rFont) const
+ {
+ return familyName == rFont.familyName &&
+ fontWeight == rFont.fontWeight &&
+ !isItalic == !rFont.isItalic &&
+ !isUnderline == !rFont.isUnderline &&
+ !isOutline == !rFont.isOutline &&
+ size == rFont.size &&
+ ascent == rFont.ascent;
+ }
+ };
+
+ /** (preliminary) API wrapper around xpdf
+
+ Wraps the functionality currently used from xpdf's OutputDev
+ interface. Subject to change.
+ */
+ struct ContentSink
+ {
+ virtual ~ContentSink() {}
+
+ /// Total number of pages for upcoming document
+ virtual void setPageNum( sal_Int32 nNumPages ) = 0;
+ virtual void startPage( const css::geometry::RealSize2D& rSize ) = 0;
+ virtual void endPage() = 0;
+
+ virtual void hyperLink( const css::geometry::RealRectangle2D& rBounds,
+ const OUString& rURI ) = 0;
+
+ virtual void pushState() = 0;
+ virtual void popState() = 0;
+
+ virtual void setFlatness( double ) = 0;
+ virtual void setTransformation( const css::geometry::AffineMatrix2D& rMatrix ) = 0;
+ virtual void setLineDash( const css::uno::Sequence<double>& dashes,
+ double start ) = 0;
+ virtual void setLineJoin( sal_Int8 lineJoin ) = 0;
+ virtual void setLineCap( sal_Int8 lineCap ) = 0;
+ virtual void setMiterLimit(double) = 0;
+ virtual void setLineWidth(double) = 0;
+ virtual void setFillColor( const css::rendering::ARGBColor& rColor ) = 0;
+ virtual void setStrokeColor( const css::rendering::ARGBColor& rColor ) = 0;
+ virtual void setFont( const FontAttributes& rFont ) = 0;
+ virtual void setTextRenderMode( sal_Int32 ) = 0;
+
+
+ virtual void strokePath( const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath ) = 0;
+ virtual void fillPath( const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath ) = 0;
+ virtual void eoFillPath( const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath ) = 0;
+
+ virtual void intersectClip(const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath) = 0;
+ virtual void intersectEoClip(const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath) = 0;
+
+ virtual void drawGlyphs( const OUString& rGlyphs,
+ const css::geometry::RealRectangle2D& rRect,
+ const css::geometry::Matrix2D& rFontMatrix,
+ double fontSize) = 0;
+
+ /// issued when a sequence of associated glyphs is drawn
+ virtual void endText() = 0;
+
+ /// draws given bitmap as a mask (using current fill color)
+ virtual void drawMask(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap,
+ bool bInvert ) = 0;
+ /// Given image must already be color-mapped and normalized to sRGB.
+ virtual void drawImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap ) = 0;
+ /** Given image must already be color-mapped and normalized to sRGB.
+
+ maskColors must contain two sequences of color components
+ */
+ virtual void drawColorMaskedImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap,
+ const css::uno::Sequence<
+ css::uno::Any>& xMaskColors ) = 0;
+ virtual void drawMaskedImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap,
+ const css::uno::Sequence<
+ css::beans::PropertyValue>& xMask,
+ bool bInvertMask) = 0;
+ virtual void drawAlphaMaskedImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xImage,
+ const css::uno::Sequence<
+ css::beans::PropertyValue>& xMask) = 0;
+ };
+
+ typedef std::shared_ptr<ContentSink> ContentSinkSharedPtr;
+}
+
+#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_CONTENTSINK_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/genericelements.hxx b/sdext/source/pdfimport/inc/genericelements.hxx
new file mode 100644
index 0000000000..6d1459a1f0
--- /dev/null
+++ b/sdext/source/pdfimport/inc/genericelements.hxx
@@ -0,0 +1,319 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_GENERICELEMENTS_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_GENERICELEMENTS_HXX
+
+#include "pdfihelper.hxx"
+#include "treevisiting.hxx"
+
+#include <com/sun/star/task/XStatusIndicator.hpp>
+#include <com/sun/star/uno/XComponentContext.hpp>
+#include <com/sun/star/i18n/BreakIterator.hpp>
+#include <basegfx/polygon/b2dpolypolygon.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/ustrbuf.hxx>
+
+#include <list>
+
+namespace pdfi
+{
+ class XmlEmitter;
+ class StyleContainer;
+ class ImageContainer;
+ class PDFIProcessor;
+ class ElementFactory;
+
+
+ struct EmitContext
+ {
+ EmitContext(
+ XmlEmitter& _rEmitter,
+ StyleContainer& _rStyles,
+ ImageContainer& _rImages,
+ PDFIProcessor& _rProcessor,
+ const css::uno::Reference<
+ css::task::XStatusIndicator>& _xStatusIndicator,
+ css::uno::Reference< css::uno::XComponentContext > const & xContext)
+ :
+ rEmitter(_rEmitter),
+ rStyles(_rStyles),
+ rImages(_rImages),
+ rProcessor(_rProcessor),
+ xStatusIndicator(_xStatusIndicator),
+ m_xContext(xContext)
+ {}
+
+ XmlEmitter& rEmitter;
+ StyleContainer& rStyles;
+ ImageContainer& rImages;
+ PDFIProcessor& rProcessor;
+ css::uno::Reference<
+ css::task::XStatusIndicator> xStatusIndicator;
+ css::uno::Reference<
+ css::uno::XComponentContext > m_xContext;
+ };
+
+ struct Element
+ {
+ protected:
+ explicit Element( Element* pParent )
+ : x( 0 ), y( 0 ), w( 0 ), h( 0 ), StyleId( -1 ), Parent( pParent )
+ {
+ if( pParent )
+ pParent->Children.emplace_back( this );
+ }
+
+ public:
+ virtual ~Element();
+
+ /**
+ To be implemented by every tree node that needs to be
+ visitable.
+ */
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) = 0;
+ /// Apply visitor to all children
+ void applyToChildren( ElementTreeVisitor& );
+ /// Union element geometry with given element
+ void updateGeometryWith( const Element* pMergeFrom );
+
+ /// To avoid some dynamic_cast cost
+ virtual const TextElement* dynCastAsTextElement() const { return nullptr; }
+ virtual TextElement* dynCastAsTextElement() { return nullptr; }
+
+#if OSL_DEBUG_LEVEL > 0
+ // xxx refact TODO: move code to visitor
+ virtual void emitStructure( int nLevel );
+#endif
+ /** el must be a valid dereferenceable iterator of el->Parent->Children
+ pNewParent must not be NULL
+ */
+ static void setParent( std::list<std::unique_ptr<Element>>::iterator const & el, Element* pNewParent );
+
+ double x, y, w, h;
+ sal_Int32 StyleId;
+ Element* Parent;
+ std::list<std::unique_ptr<Element>> Children;
+ };
+
+ struct ListElement final : public Element
+ {
+ ListElement() : Element( nullptr ) {}
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ struct HyperlinkElement final : public Element
+ {
+ friend class ElementFactory;
+ HyperlinkElement( Element* pParent, const OUString& rURI )
+ : Element( pParent ), URI( rURI ) {}
+ public:
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+
+ OUString URI;
+ };
+
+ struct GraphicalElement : public Element
+ {
+ protected:
+ GraphicalElement(Element* pParent, sal_Int32 nGCId)
+ : Element(pParent)
+ , GCId(nGCId)
+ , MirrorVertical(false)
+ , IsForText(false)
+ , FontSize(0.0)
+ , TextStyleId(0)
+ {
+ }
+
+ public:
+ sal_Int32 GCId;
+ bool MirrorVertical;
+ bool IsForText;
+ double FontSize;
+ sal_Int32 TextStyleId;
+ };
+
+ struct DrawElement : public GraphicalElement
+ {
+ protected:
+ DrawElement( Element* pParent, sal_Int32 nGCId )
+ : GraphicalElement( pParent, nGCId ), isCharacter(false), ZOrder(0) {}
+
+ public:
+ bool isCharacter;
+ sal_Int32 ZOrder;
+ };
+
+ struct FrameElement final : public DrawElement
+ {
+ friend class ElementFactory;
+ FrameElement( Element* pParent, sal_Int32 nGCId )
+ : DrawElement( pParent, nGCId ) {}
+
+ public:
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ struct TextElement final : public GraphicalElement
+ {
+ friend class ElementFactory;
+ TextElement( Element* pParent, sal_Int32 nGCId, sal_Int32 nFontId )
+ : GraphicalElement( pParent, nGCId ), FontId( nFontId ) {}
+
+ public:
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+
+ virtual const TextElement* dynCastAsTextElement() const override { return this; }
+ virtual TextElement* dynCastAsTextElement() override { return this; }
+
+ OUStringBuffer Text;
+ sal_Int32 FontId;
+ };
+
+ struct ParagraphElement final : public Element
+ {
+ friend class ElementFactory;
+ explicit ParagraphElement( Element* pParent ) : Element( pParent ), Type( Normal ), bRtl( false ) {}
+
+ public:
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) override;
+
+ // returns true only if only a single line is contained
+ bool isSingleLined( PDFIProcessor const & rProc ) const;
+ // returns the highest line height of the contained textelements
+ // line height is font height if the text element is itself multilined
+ double getLineHeight( PDFIProcessor& rProc ) const;
+ // returns the first text element child; does not recurse through subparagraphs
+ TextElement* getFirstTextChild() const;
+
+ enum ParagraphType { Normal, Headline };
+ ParagraphType Type;
+ bool bRtl;
+ };
+
+ struct PolyPolyElement final : public DrawElement
+ {
+ friend class ElementFactory;
+ PolyPolyElement( Element* pParent, sal_Int32 nGCId,
+ const basegfx::B2DPolyPolygon& rPolyPoly,
+ sal_Int8 nAction );
+ public:
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) override;
+
+ void updateGeometry();
+
+#if OSL_DEBUG_LEVEL > 0
+ virtual void emitStructure( int nLevel ) override;
+#endif
+
+ basegfx::B2DPolyPolygon PolyPoly;
+ sal_Int8 Action;
+ };
+
+ struct ImageElement final : public DrawElement
+ {
+ friend class ElementFactory;
+ ImageElement( Element* pParent, sal_Int32 nGCId, ImageId nImage )
+ : DrawElement( pParent, nGCId ), Image( nImage ) {}
+
+ public:
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+
+ ImageId Image;
+ };
+
+ struct PageElement final : public Element
+ {
+ friend class ElementFactory;
+ PageElement( Element* pParent, sal_Int32 nPageNr )
+ : Element( pParent ), PageNumber( nPageNr ), Hyperlinks(),
+ TopMargin( 0.0 ), BottomMargin( 0.0 ), LeftMargin( 0.0 ), RightMargin( 0.0 )
+ {}
+ private:
+ // helper method for resolveHyperlinks
+ bool resolveHyperlink( const std::list<std::unique_ptr<Element>>::iterator& link_it, std::list<std::unique_ptr<Element>>& rElements );
+ public:
+ virtual ~PageElement() override;
+
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) override;
+
+ void resolveHyperlinks();
+ void resolveFontStyles( PDFIProcessor const & rProc );
+ void resolveUnderlines( PDFIProcessor const & rProc );
+
+ sal_Int32 PageNumber;
+ ListElement Hyperlinks; // contains not yet realized links on this page
+ double TopMargin;
+ double BottomMargin;
+ double LeftMargin;
+ double RightMargin;
+ std::unique_ptr<Element> HeaderElement;
+ std::unique_ptr<Element> FooterElement;
+ };
+
+ struct DocumentElement final : public Element
+ {
+ friend class ElementFactory;
+ public:
+ DocumentElement() : Element( nullptr ) {}
+ virtual ~DocumentElement() override;
+
+ virtual void visitedBy( ElementTreeVisitor&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ // this class is the differentiator of document types: it will create
+ // Element objects with an optimize() method suitable for the document type
+ class ElementFactory
+ {
+ public:
+ ElementFactory() = delete;
+
+ static HyperlinkElement* createHyperlinkElement( Element* pParent, const OUString& rURI )
+ { return new HyperlinkElement( pParent, rURI ); }
+
+ static TextElement* createTextElement( Element* pParent, sal_Int32 nGCId, sal_Int32 nFontId )
+ { return new TextElement( pParent, nGCId, nFontId ); }
+ static ParagraphElement* createParagraphElement( Element* pParent )
+ { return new ParagraphElement( pParent ); }
+
+ static FrameElement* createFrameElement( Element* pParent, sal_Int32 nGCId )
+ { return new FrameElement( pParent, nGCId ); }
+ static PolyPolyElement*
+ createPolyPolyElement( Element* pParent,
+ sal_Int32 nGCId,
+ const basegfx::B2DPolyPolygon& rPolyPoly,
+ sal_Int8 nAction)
+ { return new PolyPolyElement( pParent, nGCId, rPolyPoly, nAction ); }
+ static ImageElement* createImageElement( Element* pParent, sal_Int32 nGCId, ImageId nImage )
+ { return new ImageElement( pParent, nGCId, nImage ); }
+
+ static PageElement* createPageElement( Element* pParent,
+ sal_Int32 nPageNr )
+ { return new PageElement( pParent, nPageNr ); }
+ static std::shared_ptr<DocumentElement> createDocumentElement()
+ { return std::make_shared<DocumentElement>(); }
+ };
+
+ bool isComplex(const css::uno::Reference<css::i18n::XBreakIterator>& rBreakIterator, TextElement* const pTextElem);
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/imagecontainer.hxx b/sdext/source/pdfimport/inc/imagecontainer.hxx
new file mode 100644
index 0000000000..d21ed7504f
--- /dev/null
+++ b/sdext/source/pdfimport/inc/imagecontainer.hxx
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_IMAGECONTAINER_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_IMAGECONTAINER_HXX
+
+#include "pdfihelper.hxx"
+
+#include <com/sun/star/beans/PropertyValue.hpp>
+
+#include <vector>
+
+namespace pdfi
+{
+ struct EmitContext;
+
+ class ImageContainer
+ {
+ private:
+ std::vector< css::uno::Sequence<
+ css::beans::PropertyValue> > m_aImages;
+
+ public:
+ ImageContainer();
+
+ ImageId addImage( const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap );
+ void writeBase64EncodedStream( ImageId nImageId, EmitContext& rContext );
+ };
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/odfemitter.hxx b/sdext/source/pdfimport/inc/odfemitter.hxx
new file mode 100644
index 0000000000..97111c2991
--- /dev/null
+++ b/sdext/source/pdfimport/inc/odfemitter.hxx
@@ -0,0 +1,36 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_ODFEMITTER_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_ODFEMITTER_HXX
+
+#include "xmlemitter.hxx"
+#include <com/sun/star/uno/Reference.hxx>
+
+namespace com::sun::star::io { class XOutputStream; }
+
+namespace pdfi
+{
+ XmlEmitterSharedPtr createOdfEmitter( const css::uno::Reference<
+ css::io::XOutputStream>& xOut );
+}
+
+#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_ODFEMITTER_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/pdfihelper.hxx b/sdext/source/pdfimport/inc/pdfihelper.hxx
new file mode 100644
index 0000000000..c6be1a6ba7
--- /dev/null
+++ b/sdext/source/pdfimport/inc/pdfihelper.hxx
@@ -0,0 +1,234 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIHELPER_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIHELPER_HXX
+
+#include "contentsink.hxx"
+
+#include <rtl/ustring.hxx>
+#include <rtl/math.h>
+#include <basegfx/matrix/b2dhommatrix.hxx>
+#include <basegfx/polygon/b2dpolypolygon.hxx>
+#include <basegfx/polygon/b2dpolygon.hxx>
+#include <com/sun/star/rendering/PathCapType.hpp>
+#include <com/sun/star/rendering/PathJoinType.hpp>
+
+#include <unordered_map>
+#include <vector>
+#include <o3tl/hash_combine.hxx>
+
+// virtual resolution of the PDF OutputDev in dpi
+#define PDFI_OUTDEV_RESOLUTION 7200
+
+namespace com::sun::star::task { class XInteractionHandler; }
+
+namespace pdfi
+{
+ typedef std::unordered_map< OUString, OUString > PropertyMap;
+ typedef sal_Int32 ImageId;
+
+ /// What to do with a polygon. values can be ORed together
+ enum PolygonAction { PATH_STROKE=1, PATH_FILL=2, PATH_EOFILL=4 };
+
+ OUString unitMMString( double fMM );
+ OUString convertPixelToUnitString( double fPix );
+
+ inline double convPx2mm( double fPix )
+ {
+ const double px2mm = 25.4/PDFI_OUTDEV_RESOLUTION;
+ fPix *= px2mm;
+ return fPix;
+ }
+
+ inline double convmm2Px( double fMM )
+ {
+ const double mm2px = PDFI_OUTDEV_RESOLUTION/25.4;
+ fMM *= mm2px;
+ return fMM;
+ }
+
+ /// round to 2 decimal places
+ inline double convPx2mmPrec2( double fPix )
+ {
+ constexpr double px2mm = 25.4/PDFI_OUTDEV_RESOLUTION;
+ double mm = fPix * ( px2mm * 100);
+ return std::floor(mm) / 100;
+ }
+
+ /// Convert color to "#FEFEFE" color notation
+ OUString getColorString( const css::rendering::ARGBColor& );
+ OUString getPercentString(double value);
+
+ double GetAverageTransformationScale(const basegfx::B2DHomMatrix& matrix);
+ void FillDashStyleProps(PropertyMap& props, const std::vector<double>& dashArray, double scale);
+
+ struct FontAttrHash
+ {
+ size_t operator()(const FontAttributes& rFont ) const
+ {
+ std::size_t seed = 0;
+ o3tl::hash_combine(seed, rFont.familyName.hashCode());
+ o3tl::hash_combine(seed, rFont.fontWeight);
+ o3tl::hash_combine(seed, rFont.isItalic);
+ o3tl::hash_combine(seed, rFont.isUnderline);
+ o3tl::hash_combine(seed, rFont.isOutline);
+ o3tl::hash_combine(seed, rFont.size);
+ return seed;
+ }
+ };
+
+ struct GraphicsContext
+ {
+ css::rendering::ARGBColor LineColor;
+ css::rendering::ARGBColor FillColor;
+ sal_Int8 LineJoin;
+ sal_Int8 LineCap;
+ sal_Int8 BlendMode;
+ double Flatness;
+ double LineWidth;
+ double MiterLimit;
+ std::vector<double> DashArray;
+ sal_Int32 FontId;
+ sal_Int32 TextRenderMode;
+ basegfx::B2DHomMatrix Transformation;
+ basegfx::B2DPolyPolygon Clip;
+
+ GraphicsContext() :
+ LineColor(),
+ FillColor(),
+ LineJoin(0),
+ LineCap(0),
+ BlendMode(0),
+ Flatness(0.0),
+ LineWidth(1.0),
+ MiterLimit(10.0),
+ DashArray(),
+ FontId(0),
+ TextRenderMode(0),
+ Transformation(),
+ Clip()
+ {}
+
+ bool operator==(const GraphicsContext& rRight ) const
+ {
+ return LineColor.Red == rRight.LineColor.Red &&
+ LineColor.Green == rRight.LineColor.Green &&
+ LineColor.Blue == rRight.LineColor.Blue &&
+ LineColor.Alpha == rRight.LineColor.Alpha &&
+ FillColor.Red == rRight.FillColor.Red &&
+ FillColor.Green == rRight.FillColor.Green &&
+ FillColor.Blue == rRight.FillColor.Blue &&
+ FillColor.Alpha == rRight.FillColor.Alpha &&
+ LineJoin == rRight.LineJoin &&
+ LineCap == rRight.LineCap &&
+ BlendMode == rRight.BlendMode &&
+ LineWidth == rRight.LineWidth &&
+ Flatness == rRight.Flatness &&
+ MiterLimit == rRight.MiterLimit &&
+ DashArray == rRight.DashArray &&
+ FontId == rRight.FontId &&
+ TextRenderMode == rRight.TextRenderMode &&
+ Transformation == rRight.Transformation &&
+ Clip == rRight.Clip;
+ }
+
+ OUString GetLineJoinString() const
+ {
+ switch (LineJoin)
+ {
+ default:
+ case css::rendering::PathJoinType::MITER:
+ return "miter";
+ case css::rendering::PathJoinType::ROUND:
+ return "round";
+ case css::rendering::PathJoinType::BEVEL:
+ return "bevel";
+ }
+ }
+
+ OUString GetLineCapString() const
+ {
+ switch (LineCap)
+ {
+ default:
+ case css::rendering::PathCapType::BUTT:
+ return "butt";
+ case css::rendering::PathCapType::ROUND:
+ return "round";
+ case css::rendering::PathCapType::SQUARE:
+ return "square";
+ }
+ }
+
+ bool isRotatedOrSkewed() const
+ { return Transformation.get( 0, 1 ) != 0.0 ||
+ Transformation.get( 1, 0 ) != 0.0; }
+ };
+
+ struct GraphicsContextHash
+ {
+ size_t operator()(const GraphicsContext& rGC ) const
+ {
+ std::size_t seed = 0;
+ o3tl::hash_combine(seed, rGC.LineColor.Red);
+ o3tl::hash_combine(seed, rGC.LineColor.Green);
+ o3tl::hash_combine(seed, rGC.LineColor.Blue);
+ o3tl::hash_combine(seed, rGC.LineColor.Alpha);
+ o3tl::hash_combine(seed, rGC.FillColor.Red);
+ o3tl::hash_combine(seed, rGC.FillColor.Green);
+ o3tl::hash_combine(seed, rGC.FillColor.Blue);
+ o3tl::hash_combine(seed, rGC.FillColor.Alpha);
+ o3tl::hash_combine(seed, rGC.LineJoin);
+ o3tl::hash_combine(seed, rGC.LineCap);
+ o3tl::hash_combine(seed, rGC.BlendMode);
+ o3tl::hash_combine(seed, rGC.LineWidth);
+ o3tl::hash_combine(seed, rGC.Flatness);
+ o3tl::hash_combine(seed, rGC.MiterLimit);
+ o3tl::hash_combine(seed, rGC.DashArray.size());
+ o3tl::hash_combine(seed, rGC.FontId);
+ o3tl::hash_combine(seed, rGC.TextRenderMode);
+ o3tl::hash_combine(seed, rGC.Transformation.get( 0, 0 ));
+ o3tl::hash_combine(seed, rGC.Transformation.get( 1, 0 ));
+ o3tl::hash_combine(seed, rGC.Transformation.get( 0, 1 ));
+ o3tl::hash_combine(seed, rGC.Transformation.get( 1, 1 ));
+ o3tl::hash_combine(seed, rGC.Transformation.get( 0, 2 ));
+ o3tl::hash_combine(seed, rGC.Transformation.get( 1, 2 ));
+ o3tl::hash_combine(seed, rGC.Clip.count() ? rGC.Clip.getB2DPolygon(0).count() : 0);
+ return seed;
+ }
+ };
+
+ /** retrieve password from user
+ */
+ bool getPassword( const css::uno::Reference<
+ css::task::XInteractionHandler >& xHandler,
+ OUString& rOutPwd,
+ bool bFirstTry,
+ const OUString& rDocName
+ );
+
+ void reportUnsupportedEncryptionFormat(
+ css::uno::Reference<
+ css::task::XInteractionHandler > const & handler);
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/pdfiprocessor.hxx b/sdext/source/pdfimport/inc/pdfiprocessor.hxx
new file mode 100644
index 0000000000..3fdc146716
--- /dev/null
+++ b/sdext/source/pdfimport/inc/pdfiprocessor.hxx
@@ -0,0 +1,212 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIPROCESSOR_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFIPROCESSOR_HXX
+
+#include <com/sun/star/uno/XComponentContext.hpp>
+#include <com/sun/star/task/XStatusIndicator.hpp>
+#include <com/sun/star/geometry/RealSize2D.hpp>
+#include <com/sun/star/geometry/RealRectangle2D.hpp>
+#include <com/sun/star/geometry/Matrix2D.hpp>
+
+#include <basegfx/matrix/b2dhommatrix.hxx>
+
+#include <rtl/ustring.hxx>
+
+#include <memory>
+#include <unordered_map>
+
+#include "imagecontainer.hxx"
+#include "contentsink.hxx"
+#include "treevisitorfactory.hxx"
+#include "genericelements.hxx"
+
+namespace pdfi
+{
+
+ class PDFIProcessor;
+ struct Element;
+ struct DocumentElement;
+ struct PageElement;
+ class ElementFactory;
+ class XmlEmitter;
+ class CharGlyph;
+
+ /** Main entry from the parser
+
+ Creates the internal DOM tree from the render calls
+ */
+ class PDFIProcessor final : public ContentSink
+ {
+ public:
+ css::uno::Reference<
+ css::uno::XComponentContext > m_xContext;
+ basegfx::B2DHomMatrix prevTextMatrix;
+ double prevCharWidth;
+
+ explicit PDFIProcessor( const css::uno::Reference< css::task::XStatusIndicator >& xStat,
+ css::uno::Reference< css::uno::XComponentContext > const & xContext) ;
+
+ void emit( XmlEmitter& rEmitter,
+ const TreeVisitorFactory& rVisitorFactory );
+
+ sal_Int32 getGCId( const GraphicsContext& rGC );
+ const GraphicsContext& getGraphicsContext( sal_Int32 nGCId ) const;
+ GraphicsContext& getCurrentContext() { return m_aGCStack.back(); }
+ const GraphicsContext& getCurrentContext() const { return m_aGCStack.back(); }
+
+ const css::uno::Reference< css::task::XStatusIndicator >& getStatusIndicator() const
+ { return m_xStatusIndicator; }
+
+ const FontAttributes& getFont( sal_Int32 nFontId ) const;
+ sal_Int32 getFontId( const FontAttributes& rAttr ) const;
+
+ static void sortElements( Element* pElement );
+
+ static OUString SubstituteBidiMirrored(const OUString& rString);
+
+ private:
+ void processGlyphLine();
+
+ // ContentSink interface implementation
+
+ virtual void setPageNum( sal_Int32 nNumPages ) override;
+ virtual void startPage( const css::geometry::RealSize2D& rSize ) override;
+ virtual void endPage() override;
+
+ virtual void hyperLink( const css::geometry::RealRectangle2D& rBounds,
+ const OUString& rURI ) override;
+ virtual void pushState() override;
+ virtual void popState() override;
+ virtual void setFlatness( double ) override;
+ virtual void setTransformation( const css::geometry::AffineMatrix2D& rMatrix ) override;
+ virtual void setLineDash( const css::uno::Sequence<double>& dashes,
+ double start ) override;
+ virtual void setLineJoin(sal_Int8) override;
+ virtual void setLineCap(sal_Int8) override;
+ virtual void setMiterLimit(double) override;
+ virtual void setLineWidth(double) override;
+ virtual void setFillColor( const css::rendering::ARGBColor& rColor ) override;
+ virtual void setStrokeColor( const css::rendering::ARGBColor& rColor ) override;
+ virtual void setFont( const FontAttributes& rFont ) override;
+ virtual void setTextRenderMode( sal_Int32 ) override;
+
+ virtual void strokePath( const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath ) override;
+ virtual void fillPath( const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath ) override;
+ virtual void eoFillPath( const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath ) override;
+
+ virtual void intersectClip(const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath) override;
+ virtual void intersectEoClip(const css::uno::Reference<
+ css::rendering::XPolyPolygon2D >& rPath) override;
+
+ virtual void drawGlyphs( const OUString& rGlyphs,
+ const css::geometry::RealRectangle2D& rRect,
+ const css::geometry::Matrix2D& rFontMatrix,
+ double fontSize) override;
+ virtual void endText() override;
+
+ virtual void drawMask(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap,
+ bool bInvert ) override;
+ /// Given image must already be color-mapped and normalized to sRGB.
+ virtual void drawImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap ) override;
+ /** Given image must already be color-mapped and normalized to sRGB.
+
+ maskColors must contain two sequences of color components
+ */
+ virtual void drawColorMaskedImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap,
+ const css::uno::Sequence<
+ css::uno::Any>& xMaskColors ) override;
+ virtual void drawMaskedImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xBitmap,
+ const css::uno::Sequence<
+ css::beans::PropertyValue>& xMask,
+ bool bInvertMask) override;
+ virtual void drawAlphaMaskedImage(const css::uno::Sequence<
+ css::beans::PropertyValue>& xImage,
+ const css::uno::Sequence<
+ css::beans::PropertyValue>& xMask) override;
+
+ void startIndicator( const OUString& rText );
+ void endIndicator();
+
+ void setupImage(ImageId nImage);
+
+ typedef std::unordered_map<sal_Int32,FontAttributes> IdToFontMap;
+ typedef std::unordered_map<FontAttributes,sal_Int32,FontAttrHash> FontToIdMap;
+
+ typedef std::unordered_map<sal_Int32,GraphicsContext> IdToGCMap;
+ typedef std::unordered_map<GraphicsContext, sal_Int32, GraphicsContextHash> GCToIdMap;
+
+ typedef std::vector<GraphicsContext> GraphicsContextStack;
+
+ std::vector<CharGlyph> m_GlyphsList;
+
+ std::shared_ptr<DocumentElement> m_pDocument;
+ PageElement* m_pCurPage;
+ Element* m_pCurElement;
+ sal_Int32 m_nNextFontId;
+ IdToFontMap m_aIdToFont;
+ FontToIdMap m_aFontToId;
+
+ GraphicsContextStack m_aGCStack;
+ sal_Int32 m_nNextGCId;
+ IdToGCMap m_aIdToGC;
+ GCToIdMap m_aGCToId;
+
+ ImageContainer m_aImages;
+
+ sal_Int32 m_nPages;
+ sal_Int32 m_nNextZOrder;
+ css::uno::Reference< css::task::XStatusIndicator >
+ m_xStatusIndicator;
+ };
+ class CharGlyph final
+ {
+ public:
+ CharGlyph(Element* pCurElement, const GraphicsContext& rCurrentContext,
+ double width, double prevSpaceWidth, const OUString& rGlyphs )
+ : m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext),
+ m_Width(width), m_PrevSpaceWidth(prevSpaceWidth), m_rGlyphs(rGlyphs) {};
+
+ OUString& getGlyph(){ return m_rGlyphs; }
+ double getWidth() const { return m_Width; }
+ double getPrevSpaceWidth() const { return m_PrevSpaceWidth; }
+ GraphicsContext& getGC(){ return m_rCurrentContext; }
+ Element* getCurElement(){ return m_pCurElement; }
+
+ private:
+ Element* m_pCurElement ;
+ GraphicsContext m_rCurrentContext ;
+ double m_Width ;
+ double m_PrevSpaceWidth ;
+ OUString m_rGlyphs ;
+ };
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx b/sdext/source/pdfimport/inc/pdfparse.hxx
new file mode 100644
index 0000000000..542a9ed4b1
--- /dev/null
+++ b/sdext/source/pdfimport/inc/pdfparse.hxx
@@ -0,0 +1,302 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_PDFPARSE_HXX
+
+#include <sal/types.h>
+#include <rtl/ustring.hxx>
+#include <rtl/string.hxx>
+
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <memory>
+
+namespace pdfparse
+{
+
+struct EmitImplData;
+struct PDFContainer;
+class EmitContext
+{
+public:
+ virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
+ virtual unsigned int getCurPos() = 0;
+ virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
+ virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
+
+ explicit EmitContext( const PDFContainer* pTop = nullptr );
+ virtual ~EmitContext();
+
+ // set this to deflate contained streams
+ bool m_bDeflate;
+ // set this to decrypt the PDF file
+ bool m_bDecrypt;
+
+private:
+ friend struct PDFEntry;
+ std::unique_ptr<EmitImplData> m_pImplData;
+};
+
+struct PDFEntry
+{
+ PDFEntry() {}
+ virtual ~PDFEntry();
+
+ virtual bool emit( EmitContext& rWriteContext ) const = 0;
+ virtual PDFEntry* clone() const = 0;
+
+protected:
+ static EmitImplData* getEmitData( EmitContext const & rContext );
+ static void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData );
+};
+
+struct PDFComment final : public PDFEntry
+{
+ OString m_aComment;
+
+ explicit PDFComment( OString aComment )
+ : PDFEntry(), m_aComment(std::move( aComment )) {}
+ virtual ~PDFComment() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFValue : public PDFEntry
+{
+ // abstract base class for simple values
+ PDFValue() : PDFEntry() {}
+ virtual ~PDFValue() override;
+};
+
+struct PDFName final : public PDFValue
+{
+ OString m_aName;
+
+ explicit PDFName( OString aName )
+ : PDFValue(), m_aName(std::move( aName )) {}
+ virtual ~PDFName() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+
+ OUString getFilteredName() const;
+};
+
+struct PDFString final : public PDFValue
+{
+ OString m_aString;
+
+ explicit PDFString( OString aString )
+ : PDFValue(), m_aString(std::move( aString )) {}
+ virtual ~PDFString() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+
+ OString getFilteredString() const;
+};
+
+struct PDFNumber final : public PDFValue
+{
+ double m_fValue;
+
+ explicit PDFNumber( double fVal )
+ : PDFValue(), m_fValue( fVal ) {}
+ virtual ~PDFNumber() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFBool final : public PDFValue
+{
+ bool m_bValue;
+
+ explicit PDFBool( bool bVal )
+ : PDFValue(), m_bValue( bVal ) {}
+ virtual ~PDFBool() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFObjectRef final : public PDFValue
+{
+ unsigned int m_nNumber;
+ unsigned int m_nGeneration;
+
+ PDFObjectRef( unsigned int nNr, unsigned int nGen )
+ : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
+ virtual ~PDFObjectRef() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFNull final : public PDFValue
+{
+ PDFNull() {}
+ virtual ~PDFNull() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFObject;
+struct PDFContainer : public PDFEntry
+{
+ sal_Int32 m_nOffset;
+ std::vector<std::unique_ptr<PDFEntry>> m_aSubElements;
+
+ // this is an abstract base class for identifying
+ // entries that can contain sub elements besides comments
+ PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
+ virtual ~PDFContainer() override;
+ bool emitSubElements( EmitContext& rWriteContext ) const;
+ void cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const;
+
+ PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
+ PDFObject* findObject( PDFObjectRef const * pRef ) const
+ { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
+};
+
+struct PDFArray final : public PDFContainer
+{
+ PDFArray() {}
+ virtual ~PDFArray() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFDict final : public PDFContainer
+{
+ typedef std::unordered_map<OString,PDFEntry*> Map;
+ Map m_aMap;
+
+ PDFDict() {}
+ virtual ~PDFDict() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+
+ // inserting a value of NULL will remove rName and the previous value
+ // from the dictionary
+ void insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue );
+ // removes a name/value pair from the dict
+ void eraseValue( std::string_view rName );
+ // builds new map as of sub elements
+ // returns NULL if successful, else the first offending element
+ PDFEntry* buildMap();
+};
+
+struct PDFStream final : public PDFEntry
+{
+ unsigned int m_nBeginOffset;
+ unsigned int m_nEndOffset; // offset of the byte after the stream
+ PDFDict* m_pDict;
+
+ PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
+ : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
+ virtual ~PDFStream() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+
+ unsigned int getDictLength( const PDFContainer* pObjectContainer ) const; // get contents of the "Length" entry of the dict
+};
+
+struct PDFTrailer final : public PDFContainer
+{
+ PDFDict* m_pDict;
+
+ PDFTrailer() : PDFContainer(), m_pDict( nullptr ) {}
+ virtual ~PDFTrailer() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFFileImplData;
+struct PDFFile final : public PDFContainer
+{
+private:
+ mutable std::unique_ptr<PDFFileImplData> m_pData;
+ PDFFileImplData* impl_getData() const;
+public:
+ unsigned int m_nMajor; // PDF major
+ unsigned int m_nMinor; // PDF minor
+
+ PDFFile();
+ virtual ~PDFFile() override;
+
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+
+ bool isEncrypted() const;
+
+ bool usesSupportedEncryptionFormat() const;
+
+ // this method checks whether rPwd is compatible with
+ // either user or owner password and sets up decrypt data in that case
+ // returns true if decryption can be done
+ bool setupDecryptionData( const OString& rPwd ) const;
+
+ bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
+ sal_uInt8* pOutBuffer,
+ unsigned int nObject, unsigned int nGeneration ) const;
+};
+
+struct PDFObject final : public PDFContainer
+{
+ PDFEntry* m_pObject;
+ PDFStream* m_pStream;
+ unsigned int m_nNumber;
+ unsigned int m_nGeneration;
+
+ PDFObject( unsigned int nNr, unsigned int nGen )
+ : m_pObject( nullptr ), m_pStream( nullptr ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
+ virtual ~PDFObject() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+
+ // writes only the contained stream, deflated if necessary
+ void writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
+
+private:
+ // returns true if stream is deflated
+ // fills *ppStream and *pBytes with start of stream and count of bytes
+ // memory returned in *ppStream must be freed with std::free afterwards
+ // fills in NULL and 0 in case of error
+ bool getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
+};
+
+struct PDFPart final : public PDFContainer
+{
+ PDFPart() : PDFContainer() {}
+ virtual ~PDFPart() override;
+ virtual bool emit( EmitContext& rWriteContext ) const override;
+ virtual PDFEntry* clone() const override;
+};
+
+struct PDFReader
+{
+ PDFReader() = delete;
+
+ static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName);
+};
+
+} // namespace
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/saxemitter.hxx b/sdext/source/pdfimport/inc/saxemitter.hxx
new file mode 100644
index 0000000000..a01b03c5de
--- /dev/null
+++ b/sdext/source/pdfimport/inc/saxemitter.hxx
@@ -0,0 +1,36 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_SAXEMITTER_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_SAXEMITTER_HXX
+
+#include "xmlemitter.hxx"
+#include <com/sun/star/uno/Reference.hxx>
+
+namespace com::sun::star::xml::sax { class XDocumentHandler; }
+
+namespace pdfi
+{
+ XmlEmitterSharedPtr createSaxEmitter( const css::uno::Reference<
+ css::xml::sax::XDocumentHandler >& xDocHdl );
+}
+
+#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_SAXEMITTER_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/treevisiting.hxx b/sdext/source/pdfimport/inc/treevisiting.hxx
new file mode 100644
index 0000000000..120166594a
--- /dev/null
+++ b/sdext/source/pdfimport/inc/treevisiting.hxx
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITING_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITING_HXX
+
+#include <sal/config.h>
+#include <list>
+#include <memory>
+
+namespace pdfi
+{
+ struct HyperlinkElement;
+ struct TextElement;
+ struct ParagraphElement;
+ struct FrameElement;
+ struct PolyPolyElement;
+ struct ImageElement;
+ struct PageElement;
+ struct DocumentElement;
+ struct Element;
+
+ /** To be visited by all tree element types
+
+ Visitor interface from the "visitor pattern". Implementor gets
+ called with actual tree node instances.
+ */
+ struct ElementTreeVisitor
+ {
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) = 0;
+ virtual ~ElementTreeVisitor() {}
+ };
+ typedef std::shared_ptr<ElementTreeVisitor> ElementTreeVisitorSharedPtr;
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/treevisitorfactory.hxx b/sdext/source/pdfimport/inc/treevisitorfactory.hxx
new file mode 100644
index 0000000000..45621b318e
--- /dev/null
+++ b/sdext/source/pdfimport/inc/treevisitorfactory.hxx
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITORFACTORY_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_TREEVISITORFACTORY_HXX
+
+#include <memory>
+
+namespace pdfi
+{
+ struct ElementTreeVisitor;
+ struct EmitContext;
+ class PDFIProcessor;
+ class StyleContainer;
+
+ /** Tree manipulation factory
+
+ Creates visitor objects performing various operations on the
+ pdf parse tree
+ */
+ struct TreeVisitorFactory
+ {
+ virtual ~TreeVisitorFactory() {}
+
+ /// Create visitor that combines tree nodes
+ virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor&) const = 0;
+ /// Create visitor that prepares style info
+ virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor(
+ StyleContainer&, PDFIProcessor&) const = 0;
+ /// Create visitor that emits tree to an output target
+ virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext&) const = 0;
+ };
+
+ typedef std::shared_ptr<TreeVisitorFactory> TreeVisitorFactorySharedPtr;
+
+ TreeVisitorFactorySharedPtr createWriterTreeVisitorFactory();
+ TreeVisitorFactorySharedPtr createImpressTreeVisitorFactory();
+ TreeVisitorFactorySharedPtr createDrawTreeVisitorFactory();
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/wrapper.hxx b/sdext/source/pdfimport/inc/wrapper.hxx
new file mode 100644
index 0000000000..a27e92e289
--- /dev/null
+++ b/sdext/source/pdfimport/inc/wrapper.hxx
@@ -0,0 +1,94 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX
+
+#include "contentsink.hxx"
+#include <com/sun/star/uno/Reference.hxx>
+#include <com/sun/star/task/XInteractionHandler.hpp>
+
+namespace com::sun::star {
+ namespace uno {
+ class XComponentContext;
+ }
+ namespace io {
+ class XInputStream;
+ }
+}
+
+namespace pdfi
+{
+ bool xpdf_ImportFromFile( const OUString& rURL,
+ const ContentSinkSharedPtr& rSink,
+ const css::uno::Reference<
+ css::task::XInteractionHandler >& xIHdl,
+ const OUString& rPwd,
+ const css::uno::Reference<
+ css::uno::XComponentContext >& xContext,
+ const OUString& rFilterOptions);
+ bool xpdf_ImportFromStream( const css::uno::Reference<
+ css::io::XInputStream >& xInput,
+ const ContentSinkSharedPtr& rSink,
+ const css::uno::Reference<
+ css::task::XInteractionHandler >& xIHdl,
+ const OUString& rPwd,
+ const css::uno::Reference<
+ css::uno::XComponentContext >& xContext,
+ const OUString& rFilterOptions );
+
+ constexpr OUString fontAttributesSuffixes[] = {
+ // Note: for performance consideration, each one listed here is evaluated once,
+ // and they are checked from the suffix, thus the order matters.
+ // e.g. for "TimesNewRomanPS-BoldItalic", to get "TimesNewRoman", you should
+ // first have "Italic", and then "Bold", then "-", and then "PS".
+ u"-VKana"_ustr,
+ u"MT"_ustr,
+ u"PS"_ustr,
+ u"PSMT"_ustr,
+ u"Regular"_ustr,
+ u"Normal"_ustr,
+ u"Book"_ustr,
+ u"Medium"_ustr,
+ u"ExtraBold"_ustr,
+ u"UltraBold"_ustr,
+ u"ExtraLight"_ustr,
+ u"UltraLight"_ustr,
+ u"Bold"_ustr,
+ u"Heavy"_ustr,
+ u"Black"_ustr,
+ u"Italic"_ustr,
+ u"Oblique"_ustr,
+ u"Bold"_ustr, //BoldItalic, BoldOblique
+ u"Light"_ustr,
+ u"Thin"_ustr,
+ u"Semibold"_ustr,
+ u"-Roman"_ustr,
+ u"Reg"_ustr,
+ u"VKana"_ustr,
+ u"-"_ustr,
+ u","_ustr,
+ u";"_ustr,
+ u"PS"_ustr, // e.g. TimesNewRomanPS-BoldMT
+ };
+}
+
+#endif // INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_WRAPPER_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/inc/xmlemitter.hxx b/sdext/source/pdfimport/inc/xmlemitter.hxx
new file mode 100644
index 0000000000..9ffacda7f1
--- /dev/null
+++ b/sdext/source/pdfimport/inc/xmlemitter.hxx
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_XMLEMITTER_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_INC_XMLEMITTER_HXX
+
+#include "pdfihelper.hxx"
+#include <memory>
+
+namespace pdfi
+{
+ /** Output interface to ODF
+
+ Should be easy to implement using either SAX events or plain ODF
+ */
+ class XmlEmitter
+ {
+ public:
+ virtual ~XmlEmitter() {}
+
+ /** Open up a tag with the given properties
+ */
+ virtual void beginTag( const char* pTag, const PropertyMap& rProperties ) = 0;
+ /** Write PCTEXT as-is to output
+ */
+ virtual void write( const OUString& rString ) = 0;
+ /** Close previously opened tag
+ */
+ virtual void endTag( const char* pTag ) = 0;
+ };
+
+ typedef std::shared_ptr<XmlEmitter> XmlEmitterSharedPtr;
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */