diff options
Diffstat (limited to 'include/vcl/filter')
-rw-r--r-- | include/vcl/filter/PDFiumLibrary.hxx | 236 | ||||
-rw-r--r-- | include/vcl/filter/PngImageReader.hxx | 56 | ||||
-rw-r--r-- | include/vcl/filter/SvmReader.hxx | 94 | ||||
-rw-r--r-- | include/vcl/filter/SvmWriter.hxx | 94 | ||||
-rw-r--r-- | include/vcl/filter/pdfdocument.hxx | 645 | ||||
-rw-r--r-- | include/vcl/filter/pdfobjectcontainer.hxx | 40 |
6 files changed, 1165 insertions, 0 deletions
diff --git a/include/vcl/filter/PDFiumLibrary.hxx b/include/vcl/filter/PDFiumLibrary.hxx new file mode 100644 index 000000000..3a4d0d83f --- /dev/null +++ b/include/vcl/filter/PDFiumLibrary.hxx @@ -0,0 +1,236 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#pragma once + +#include <memory> + +#include <com/sun/star/util/DateTime.hpp> + +#include <vcl/dllapi.h> +#include <basegfx/vector/b2dsize.hxx> +#include <basegfx/range/b2drectangle.hxx> +#include <basegfx/point/b2dpoint.hxx> +#include <basegfx/matrix/b2dhommatrix.hxx> +#include <rtl/ustring.hxx> +#include <tools/color.hxx> +#include <tools/gen.hxx> +#include <vcl/checksum.hxx> +#include <vcl/Scanline.hxx> +#include <vcl/pdf/PDFAnnotationSubType.hxx> +#include <vcl/pdf/PDFPageObjectType.hxx> +#include <vcl/pdf/PDFSegmentType.hxx> +#include <vcl/pdf/PDFBitmapType.hxx> +#include <vcl/pdf/PDFObjectType.hxx> +#include <vcl/pdf/PDFTextRenderMode.hxx> +#include <vcl/pdf/PDFFillMode.hxx> +#include <vcl/pdf/PDFFindFlags.hxx> +#include <vcl/pdf/PDFErrorType.hxx> + +class SvMemoryStream; + +namespace vcl::pdf +{ +inline constexpr OStringLiteral constDictionaryKeyTitle = "T"; +inline constexpr OStringLiteral constDictionaryKeyContents = "Contents"; +inline constexpr OStringLiteral constDictionaryKeyPopup = "Popup"; +inline constexpr OStringLiteral constDictionaryKeyModificationDate = "M"; +inline constexpr OStringLiteral constDictionaryKeyInteriorColor = "IC"; + +class PDFiumBitmap; +class PDFiumDocument; +class PDFiumPageObject; + +class VCL_DLLPUBLIC PDFium +{ +public: + virtual ~PDFium() = default; + + virtual const OUString& getLastError() const = 0; + + virtual std::unique_ptr<PDFiumDocument> openDocument(const void* pData, int nSize, + const OString& rPassword) + = 0; + virtual PDFErrorType getLastErrorCode() = 0; + virtual std::unique_ptr<PDFiumBitmap> createBitmap(int nWidth, int nHeight, int nAlpha) = 0; +}; + +class PDFiumPage; + +class VCL_DLLPUBLIC PDFiumBitmap +{ +public: + virtual ~PDFiumBitmap() = default; + virtual void fillRect(int left, int top, int width, int height, sal_uInt32 nColor) = 0; + virtual void renderPageBitmap(PDFiumDocument* pDoc, PDFiumPage* pPage, int nStartX, int nStartY, + int nSizeX, int nSizeY) + = 0; + virtual ConstScanline getBuffer() = 0; + virtual int getStride() = 0; + virtual int getWidth() = 0; + virtual int getHeight() = 0; + virtual PDFBitmapType getFormat() = 0; +}; + +class VCL_DLLPUBLIC PDFiumAnnotation +{ +public: + virtual ~PDFiumAnnotation() = default; + virtual PDFAnnotationSubType getSubType() = 0; + virtual basegfx::B2DRectangle getRectangle() = 0; + virtual bool hasKey(OString const& rKey) = 0; + virtual PDFObjectType getValueType(OString const& rKey) = 0; + virtual OUString getString(OString const& rKey) = 0; + virtual std::unique_ptr<PDFiumAnnotation> getLinked(OString const& rKey) = 0; + virtual int getObjectCount() = 0; + virtual std::unique_ptr<PDFiumPageObject> getObject(int nIndex) = 0; + virtual std::vector<std::vector<basegfx::B2DPoint>> getInkStrokes() = 0; + virtual std::vector<basegfx::B2DPoint> getVertices() = 0; + virtual Color getColor() = 0; + virtual Color getInteriorColor() = 0; + virtual float getBorderWidth() = 0; + virtual basegfx::B2DSize getBorderCornerRadius() = 0; + virtual size_t getAttachmentPointsCount() = 0; + virtual std::vector<basegfx::B2DPoint> getAttachmentPoints(size_t nIndex) = 0; + virtual std::vector<basegfx::B2DPoint> getLineGeometry() = 0; +}; + +class PDFiumTextPage; + +class VCL_DLLPUBLIC PDFiumPathSegment +{ +public: + virtual ~PDFiumPathSegment() = default; + virtual basegfx::B2DPoint getPoint() const = 0; + virtual bool isClosed() const = 0; + virtual PDFSegmentType getType() const = 0; +}; + +class VCL_DLLPUBLIC PDFiumPageObject +{ +public: + virtual ~PDFiumPageObject() = default; + + virtual PDFPageObjectType getType() = 0; + virtual OUString getText(std::unique_ptr<PDFiumTextPage> const& pTextPage) = 0; + + virtual int getFormObjectCount() = 0; + virtual std::unique_ptr<PDFiumPageObject> getFormObject(int nIndex) = 0; + + virtual basegfx::B2DHomMatrix getMatrix() = 0; + virtual basegfx::B2DRectangle getBounds() = 0; + virtual double getFontSize() = 0; + virtual OUString getFontName() = 0; + virtual PDFTextRenderMode getTextRenderMode() = 0; + virtual Color getFillColor() = 0; + virtual Color getStrokeColor() = 0; + virtual double getStrokeWidth() = 0; + // Path + virtual int getPathSegmentCount() = 0; + virtual std::unique_ptr<PDFiumPathSegment> getPathSegment(int index) = 0; + virtual Size getImageSize(PDFiumPage& rPage) = 0; + virtual std::unique_ptr<PDFiumBitmap> getImageBitmap() = 0; + virtual bool getDrawMode(PDFFillMode& eFillMode, bool& bStroke) = 0; +}; + +class VCL_DLLPUBLIC PDFiumSearchHandle +{ +public: + virtual ~PDFiumSearchHandle() = default; + + virtual bool findNext() = 0; + virtual bool findPrev() = 0; + virtual int getSearchResultIndex() = 0; + virtual int getSearchCount() = 0; +}; + +class VCL_DLLPUBLIC PDFiumTextPage +{ +public: + virtual ~PDFiumTextPage() = default; + + virtual int countChars() = 0; + virtual unsigned int getUnicode(int index) = 0; + virtual std::unique_ptr<PDFiumSearchHandle> + findStart(const OUString& rFindWhat, PDFFindFlags nFlags, sal_Int32 nStartIndex) = 0; + + /// Returned rect is no longer upside down and is in mm100. + virtual basegfx::B2DRectangle getCharBox(int nIndex, double fPageHeight) = 0; +}; + +class VCL_DLLPUBLIC PDFiumPage +{ +public: + virtual ~PDFiumPage() = default; + + virtual int getObjectCount() = 0; + virtual std::unique_ptr<PDFiumPageObject> getObject(int nIndex) = 0; + + virtual int getAnnotationCount() = 0; + virtual int getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation) = 0; + + virtual std::unique_ptr<PDFiumAnnotation> getAnnotation(int nIndex) = 0; + + virtual std::unique_ptr<PDFiumTextPage> getTextPage() = 0; + + /// Get bitmap checksum of the page, without annotations/commenting. + virtual BitmapChecksum getChecksum(int nMDPPerm) = 0; + + virtual double getWidth() = 0; + virtual double getHeight() = 0; + + virtual bool hasTransparency() = 0; + + virtual bool hasLinks() = 0; +}; + +/// Represents one digital signature, as exposed by PDFium. +class VCL_DLLPUBLIC PDFiumSignature +{ +public: + virtual ~PDFiumSignature() = default; + + virtual std::vector<int> getByteRange() = 0; + virtual int getDocMDPPermission() = 0; + virtual std::vector<unsigned char> getContents() = 0; + virtual OString getSubFilter() = 0; + virtual OUString getReason() = 0; + virtual css::util::DateTime getTime() = 0; +}; + +class VCL_DLLPUBLIC PDFiumDocument +{ +public: + virtual ~PDFiumDocument() = default; + + // Page size in points + virtual basegfx::B2DSize getPageSize(int nIndex) = 0; + virtual int getPageCount() = 0; + virtual int getSignatureCount() = 0; + virtual int getFileVersion() = 0; + virtual bool saveWithVersion(SvMemoryStream& rStream, int nFileVersion) = 0; + + virtual std::unique_ptr<PDFiumPage> openPage(int nIndex) = 0; + virtual std::unique_ptr<PDFiumSignature> getSignature(int nIndex) = 0; + virtual std::vector<unsigned int> getTrailerEnds() = 0; +}; + +struct VCL_DLLPUBLIC PDFiumLibrary final +{ + static std::shared_ptr<PDFium>& get(); +}; + +// Tools + +VCL_DLLPUBLIC OUString convertPdfDateToISO8601(OUString const& rInput); + +} // namespace vcl::pdf + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/vcl/filter/PngImageReader.hxx b/include/vcl/filter/PngImageReader.hxx new file mode 100644 index 000000000..bbb5b7c8d --- /dev/null +++ b/include/vcl/filter/PngImageReader.hxx @@ -0,0 +1,56 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_VCL_FILTER_PNGIMAGEREADER_HXX +#define INCLUDED_VCL_FILTER_PNGIMAGEREADER_HXX + +#include <sal/config.h> + +#include <memory> + +#include <vcl/dllapi.h> + +#include <com/sun/star/uno/Reference.hxx> + +namespace com::sun::star::task +{ +class XStatusIndicator; +} + +class BitmapEx; +class SvStream; + +namespace vcl +{ +class VCL_DLLPUBLIC PngImageReader +{ + SvStream& mrStream; + css::uno::Reference<css::task::XStatusIndicator> mxStatusIndicator; + +public: + PngImageReader(SvStream& rStream); + + // Returns true if image was successfully read without errors. + // A usable bitmap may be returned even if there were errors (e.g. incomplete image). + bool read(BitmapEx& rBitmap); + // Returns a bitmap without indicating if there were errors. + BitmapEx read(); + + // Returns the contents of the msOG chunk (containing a Gif image), if it exists. + // Does not change position in the stream. + static std::unique_ptr<sal_uInt8[]> getMicrosoftGifChunk(SvStream& rStream, + sal_Int32* chunkSize = nullptr); +}; + +} // namespace vcl + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/vcl/filter/SvmReader.hxx b/include/vcl/filter/SvmReader.hxx new file mode 100644 index 000000000..d16a775e3 --- /dev/null +++ b/include/vcl/filter/SvmReader.hxx @@ -0,0 +1,94 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#pragma once + +#include <vcl/gdimtf.hxx> + +class SvStream; + +class VCL_DLLPUBLIC SvmReader +{ +private: + SvStream& mrStream; + + void ReadColor(::Color& rColor); + +public: + SvmReader(SvStream& rIStm); + + SvStream& Read(GDIMetaFile& rMetaFile, ImplMetaReadData* pData = nullptr); + rtl::Reference<MetaAction> MetaActionHandler(ImplMetaReadData* pData); + rtl::Reference<MetaAction> LineColorHandler(); + rtl::Reference<MetaAction> FillColorHandler(); + rtl::Reference<MetaAction> RectHandler(); + rtl::Reference<MetaAction> PointHandler(); + rtl::Reference<MetaAction> PixelHandler(); + rtl::Reference<MetaAction> LineHandler(); + rtl::Reference<MetaAction> RoundRectHandler(); + rtl::Reference<MetaAction> EllipseHandler(); + rtl::Reference<MetaAction> ArcHandler(); + rtl::Reference<MetaAction> PieHandler(); + rtl::Reference<MetaAction> ChordHandler(); + rtl::Reference<MetaAction> PolyLineHandler(); + rtl::Reference<MetaAction> PolygonHandler(); + rtl::Reference<MetaAction> PolyPolygonHandler(); + rtl::Reference<MetaAction> TextHandler(const ImplMetaReadData* pData); + rtl::Reference<MetaAction> TextArrayHandler(const ImplMetaReadData* pData); + rtl::Reference<MetaAction> StretchTextHandler(const ImplMetaReadData* pData); + rtl::Reference<MetaAction> TextRectHandler(const ImplMetaReadData* pData); + rtl::Reference<MetaAction> TextLineHandler(); + rtl::Reference<MetaAction> BmpHandler(); + rtl::Reference<MetaAction> BmpScaleHandler(); + rtl::Reference<MetaAction> BmpScalePartHandler(); + rtl::Reference<MetaAction> BmpExHandler(); + rtl::Reference<MetaAction> BmpExScaleHandler(); + rtl::Reference<MetaAction> BmpExScalePartHandler(); + rtl::Reference<MetaAction> MaskHandler(); + rtl::Reference<MetaAction> MaskScaleHandler(); + rtl::Reference<MetaAction> MaskScalePartHandler(); + rtl::Reference<MetaAction> GradientHandler(); + rtl::Reference<MetaAction> GradientExHandler(); + rtl::Reference<MetaAction> HatchHandler(); + rtl::Reference<MetaAction> WallpaperHandler(); + rtl::Reference<MetaAction> ClipRegionHandler(); + rtl::Reference<MetaAction> ISectRectClipRegionHandler(); + rtl::Reference<MetaAction> ISectRegionClipRegionHandler(); + rtl::Reference<MetaAction> MoveClipRegionHandler(); + rtl::Reference<MetaAction> TextColorHandler(); + rtl::Reference<MetaAction> TextFillColorHandler(); + rtl::Reference<MetaAction> TextLineColorHandler(); + rtl::Reference<MetaAction> OverlineColorHandler(); + rtl::Reference<MetaAction> TextAlignHandler(); + rtl::Reference<MetaAction> MapModeHandler(); + rtl::Reference<MetaAction> FontHandler(ImplMetaReadData* pData); + rtl::Reference<MetaAction> PushHandler(); + rtl::Reference<MetaAction> PopHandler(); + rtl::Reference<MetaAction> RasterOpHandler(); + rtl::Reference<MetaAction> TransparentHandler(); + rtl::Reference<MetaAction> FloatTransparentHandler(ImplMetaReadData* pData); + rtl::Reference<MetaAction> EPSHandler(); + rtl::Reference<MetaAction> RefPointHandler(); + rtl::Reference<MetaAction> CommentHandler(); + rtl::Reference<MetaAction> LayoutModeHandler(); + rtl::Reference<MetaAction> TextLanguageHandler(); + static rtl::Reference<MetaAction> DefaultHandler(); +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/vcl/filter/SvmWriter.hxx b/include/vcl/filter/SvmWriter.hxx new file mode 100644 index 000000000..ef8cf8c52 --- /dev/null +++ b/include/vcl/filter/SvmWriter.hxx @@ -0,0 +1,94 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#pragma once + +#include <vcl/gdimtf.hxx> +#include <vcl/metaact.hxx> + +class SvStream; + +class VCL_DLLPUBLIC SvmWriter +{ +private: + SvStream& mrStream; + + void WriteColor(::Color aColor); + +public: + SvmWriter(SvStream& rIStm); + + SvStream& Write(const GDIMetaFile& rMetaFile); + static BitmapChecksum GetChecksum(const GDIMetaFile& rMetaFile); + void MetaActionHandler(MetaAction* pAction, ImplMetaWriteData* pData); + void ActionHandler(const MetaAction* pAction); + void PixelHandler(const MetaPixelAction* pAction); + void PointHandler(const MetaPointAction* pAction); + void LineHandler(const MetaLineAction* pAction); + void RectHandler(const MetaRectAction* pAction); + void RoundRectHandler(const MetaRoundRectAction* pAction); + void EllipseHandler(const MetaEllipseAction* pAction); + void ArcHandler(const MetaArcAction* pAction); + void PieHandler(const MetaPieAction* pAction); + void ChordHandler(const MetaChordAction* pAction); + void PolyLineHandler(const MetaPolyLineAction* pAction); + void PolygonHandler(const MetaPolygonAction* pAction); + void PolyPolygonHandler(const MetaPolyPolygonAction* pAction); + void TextHandler(const MetaTextAction* pAction, const ImplMetaWriteData* pData); + void TextArrayHandler(const MetaTextArrayAction* pAction, const ImplMetaWriteData* pData); + void StretchTextHandler(const MetaStretchTextAction* pAction, const ImplMetaWriteData* pData); + void TextRectHandler(const MetaTextRectAction* pAction, const ImplMetaWriteData* pData); + void TextLineHandler(const MetaTextLineAction* pAction); + void BmpHandler(const MetaBmpAction* pAction); + void BmpScaleHandler(const MetaBmpScaleAction* pAction); + void BmpScalePartHandler(const MetaBmpScalePartAction* pAction); + void BmpExHandler(const MetaBmpExAction* pAction); + void BmpExScaleHandler(const MetaBmpExScaleAction* pAction); + void BmpExScalePartHandler(const MetaBmpExScalePartAction* pAction); + void MaskHandler(const MetaMaskAction* pAction); + void MaskScaleHandler(const MetaMaskScaleAction* pAction); + void MaskScalePartHandler(const MetaMaskScalePartAction* pAction); + void GradientHandler(const MetaGradientAction* pAction); + void GradientExHandler(const MetaGradientExAction* pAction); + void HatchHandler(const MetaHatchAction* pAction); + void WallpaperHandler(const MetaWallpaperAction* pAction); + void ClipRegionHandler(const MetaClipRegionAction* pAction); + void ISectRectClipRegionHandler(const MetaISectRectClipRegionAction* pAction); + void ISectRegionClipRegionHandler(const MetaISectRegionClipRegionAction* pAction); + void MoveClipRegionHandler(const MetaMoveClipRegionAction* pAction); + void LineColorHandler(const MetaLineColorAction* pAction); + void FillColorHandler(const MetaFillColorAction* pAction); + void TextColorHandler(const MetaTextColorAction* pAction); + void TextFillColorHandler(const MetaTextFillColorAction* pAction); + void TextLineColorHandler(const MetaTextLineColorAction* pAction); + void OverlineColorHandler(const MetaOverlineColorAction* pAction); + void TextAlignHandler(const MetaTextAlignAction* pAction); + void MapModeHandler(const MetaMapModeAction* pAction); + void FontHandler(const MetaFontAction* pAction, ImplMetaWriteData* pData); + void PushHandler(const MetaPushAction* pAction); + void PopHandler(const MetaPopAction* pAction); + void RasterOpHandler(const MetaRasterOpAction* pAction); + void TransparentHandler(const MetaTransparentAction* pAction); + void FloatTransparentHandler(const MetaFloatTransparentAction* pAction); + void EPSHandler(const MetaEPSAction* pAction); + void RefPointHandler(const MetaRefPointAction* pAction); + void CommentHandler(const MetaCommentAction* pAction); + void LayoutModeHandler(const MetaLayoutModeAction* pAction); + void TextLanguageHandler(const MetaTextLanguageAction* pAction); +}; diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx new file mode 100644 index 000000000..fbe0be89c --- /dev/null +++ b/include/vcl/filter/pdfdocument.hxx @@ -0,0 +1,645 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX +#define INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX + +#include <memory> +#include <map> +#include <vector> + +#include <tools/stream.hxx> +#include <vcl/dllapi.h> +#include <rtl/strbuf.hxx> + +#include <vcl/filter/pdfobjectcontainer.hxx> + +namespace com::sun::star::security +{ +class XCertificate; +} + +namespace com::sun::star::uno +{ +template <class interface_type> class Reference; +} + +namespace tools +{ +class Rectangle; +} + +namespace vcl::filter +{ +class PDFTrailerElement; +class PDFReferenceElement; +class PDFDocument; +class PDFDictionaryElement; +class PDFArrayElement; +class PDFStreamElement; +class PDFNumberElement; + +/// A byte range in a PDF file. +class VCL_DLLPUBLIC PDFElement +{ + bool m_bVisiting = false; + bool m_bParsing = false; + +public: + PDFElement() = default; + virtual bool Read(SvStream& rStream) = 0; + virtual ~PDFElement() = default; + void setVisiting(bool bVisiting) { m_bVisiting = bVisiting; } + bool alreadyVisiting() const { return m_bVisiting; } + void setParsing(bool bParsing) { m_bParsing = bParsing; } + bool alreadyParsing() const { return m_bParsing; } + + virtual void writeString(OStringBuffer& rBuffer) = 0; +}; + +/// Indirect object: something with a unique ID. +class VCL_DLLPUBLIC PDFObjectElement final : public PDFElement +{ + /// The document owning this element. + PDFDocument& m_rDoc; + double m_fObjectValue; + double m_fGenerationValue; + /// If set, the object contains this number element (outside any dictionary/array). + PDFNumberElement* m_pNumberElement; + /// Position after the '<<' token. + sal_uInt64 m_nDictionaryOffset; + /// Length of the dictionary buffer till (before) the '>>' token. + sal_uInt64 m_nDictionaryLength; + PDFDictionaryElement* m_pDictionaryElement; + /// Position after the '[' token, if m_pArrayElement is set. + sal_uInt64 m_nArrayOffset; + /// Length of the array buffer till (before) the ']' token. + sal_uInt64 m_nArrayLength; + /// The contained direct array, if any. + PDFArrayElement* m_pArrayElement; + /// The stream of this object, used when this is an object stream. + PDFStreamElement* m_pStreamElement; + /// Objects of an object stream. + std::vector<std::unique_ptr<PDFObjectElement>> m_aStoredElements; + /// Elements of an object in an object stream. + std::vector<std::unique_ptr<PDFElement>> m_aElements; + /// Uncompressed buffer of an object in an object stream. + std::unique_ptr<SvMemoryStream> m_pStreamBuffer; + /// List of all reference elements inside this object's dictionary and + /// nested dictionaries. + std::vector<PDFReferenceElement*> m_aDictionaryReferences; + + bool m_bParsed; + + void parseIfNecessary(); + +public: + PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue); + bool Read(SvStream& rStream) override; + PDFElement* Lookup(const OString& rDictionaryKey); + PDFObjectElement* LookupObject(const OString& rDictionaryKey); + double GetObjectValue() const; + void SetDictionaryOffset(sal_uInt64 nDictionaryOffset); + sal_uInt64 GetDictionaryOffset(); + void SetDictionaryLength(sal_uInt64 nDictionaryLength); + sal_uInt64 GetDictionaryLength(); + PDFDictionaryElement* GetDictionary(); + void SetDictionary(PDFDictionaryElement* pDictionaryElement); + void SetNumberElement(PDFNumberElement* pNumberElement); + PDFNumberElement* GetNumberElement() const; + /// Get access to the parsed key-value items from the object dictionary. + const std::map<OString, PDFElement*>& GetDictionaryItems(); + const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const; + void AddDictionaryReference(PDFReferenceElement* pReference); + void SetArray(PDFArrayElement* pArrayElement); + void SetStream(PDFStreamElement* pStreamElement); + /// Access to the stream of the object, if it has any. + PDFStreamElement* GetStream() const; + void SetArrayOffset(sal_uInt64 nArrayOffset); + sal_uInt64 GetArrayOffset() const; + void SetArrayLength(sal_uInt64 nArrayLength); + sal_uInt64 GetArrayLength() const; + PDFArrayElement* GetArray(); + /// Parse objects stored in this object stream. + void ParseStoredObjects(); + std::vector<std::unique_ptr<PDFElement>>& GetStoredElements(); + SvMemoryStream* GetStreamBuffer() const; + void SetStreamBuffer(std::unique_ptr<SvMemoryStream>& pStreamBuffer); + PDFDocument& GetDocument(); + + void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); } +}; + +/// Array object: a list. +class VCL_DLLPUBLIC PDFArrayElement final : public PDFElement +{ + std::vector<PDFElement*> m_aElements; + /// The object that contains this array. + PDFObjectElement* m_pObject; + +public: + PDFArrayElement(PDFObjectElement* pObject); + bool Read(SvStream& rStream) override; + void PushBack(PDFElement* pElement); + const std::vector<PDFElement*>& GetElements() const; + PDFElement* GetElement(size_t nIndex) const { return m_aElements[nIndex]; } + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append("[ "); + for (auto& rElement : m_aElements) + { + rElement->writeString(rBuffer); + rBuffer.append(" "); + } + rBuffer.append("]"); + } +}; + +/// Reference object: something with a unique ID. +class VCL_DLLPUBLIC PDFReferenceElement final : public PDFElement +{ + PDFDocument& m_rDoc; + int m_fObjectValue; + int m_fGenerationValue; + /// Location after the 'R' token. + sal_uInt64 m_nOffset = 0; + /// The element providing the object number. + PDFNumberElement& m_rObject; + +public: + PDFReferenceElement(PDFDocument& rDoc, PDFNumberElement& rObject, + PDFNumberElement const& rGeneration); + bool Read(SvStream& rStream) override; + /// Assuming the reference points to a number object, return its value. + double LookupNumber(SvStream& rStream) const; + /// Lookup referenced object, without assuming anything about its contents. + PDFObjectElement* LookupObject(); + int GetObjectValue() const; + int GetGenerationValue() const; + sal_uInt64 GetOffset() const; + PDFNumberElement& GetObjectElement() const; + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append(sal_Int32(GetObjectValue())); + rBuffer.append(' '); + rBuffer.append(sal_Int32(GetGenerationValue())); + rBuffer.append(" R"); + } +}; + +/// Stream object: a byte array with a known length. +class VCL_DLLPUBLIC PDFStreamElement final : public PDFElement +{ + size_t m_nLength; + sal_uInt64 m_nOffset; + /// The byte array itself. + SvMemoryStream m_aMemory; + +public: + explicit PDFStreamElement(size_t nLength); + bool Read(SvStream& rStream) override; + sal_uInt64 GetOffset() const; + SvMemoryStream& GetMemory(); + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append("stream\n"); + rBuffer.append(static_cast<const char*>(m_aMemory.GetData()), m_aMemory.GetSize()); + rBuffer.append("\nendstream\n"); + } +}; + +/// Name object: a key string. +class VCL_DLLPUBLIC PDFNameElement final : public PDFElement +{ + OString m_aValue; + /// Offset after the '/' token. + sal_uInt64 m_nLocation = 0; + +public: + PDFNameElement(); + bool Read(SvStream& rStream) override; + void SetValue(const OString& rValue) { m_aValue = rValue; } + const OString& GetValue() const; + sal_uInt64 GetLocation() const; + sal_uInt64 GetLength() const { return m_aValue.getLength(); } + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append("/"); + rBuffer.append(m_aValue); + } +}; + +/// Dictionary object: a set key-value pairs. +class VCL_DLLPUBLIC PDFDictionaryElement final : public PDFElement +{ + /// Key-value pairs when the dictionary is a nested value. + std::map<OString, PDFElement*> m_aItems; + /// Offset after the '<<' token. + sal_uInt64 m_nLocation = 0; + /// Position after the '/' token. + std::map<OString, sal_uInt64> m_aDictionaryKeyOffset; + /// Length of the dictionary key and value, till (before) the next token. + std::map<OString, sal_uInt64> m_aDictionaryKeyValueLength; + +public: + PDFDictionaryElement(); + bool Read(SvStream& rStream) override; + + static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary, + const OString& rKey); + void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset); + sal_uInt64 GetKeyOffset(const OString& rKey) const; + void SetKeyValueLength(const OString& rKey, sal_uInt64 nLength); + sal_uInt64 GetKeyValueLength(const OString& rKey) const; + const std::map<OString, PDFElement*>& GetItems() const; + /// Looks up an object which is only referenced in this dictionary. + PDFObjectElement* LookupObject(const OString& rDictionaryKey); + /// Looks up an element which is contained in this dictionary. + PDFElement* LookupElement(const OString& rDictionaryKey); + sal_uInt64 GetLocation() const { return m_nLocation; } + void insert(OString const& rKey, PDFElement* pPDFElement) + { + m_aItems.emplace(rKey, pPDFElement); + } + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append("<< "); + for (auto& rPair : m_aItems) + { + rBuffer.append("/"); + rBuffer.append(rPair.first); + rBuffer.append(" "); + rPair.second->writeString(rBuffer); + rBuffer.append(" "); + } + rBuffer.append(">>"); + } +}; + +enum class TokenizeMode +{ + /// Full file. + END_OF_STREAM, + /// Till the first %%EOF token. + EOF_TOKEN, + /// Till the end of the current object. + END_OF_OBJECT, + /// Same as END_OF_OBJECT, but for object streams (no endobj keyword). + STORED_OBJECT +}; + +/// The type column of an entry in a cross-reference stream. +enum class XRefEntryType +{ + /// xref "f" or xref stream "0". + FREE, + /// xref "n" or xref stream "1". + NOT_COMPRESSED, + /// xref stream "2". + COMPRESSED +}; + +/// An entry in a cross-reference stream. +class XRefEntry +{ + XRefEntryType m_eType = XRefEntryType::NOT_COMPRESSED; + /** + * Non-compressed: The byte offset of the object, starting from the + * beginning of the file. + * Compressed: The object number of the object stream in which this object is + * stored. + */ + sal_uInt64 m_nOffset = 0; + /// Are changed as part of an incremental update?. + bool m_bDirty = false; + +public: + XRefEntry(); + + void SetType(XRefEntryType eType) { m_eType = eType; } + + XRefEntryType GetType() const { return m_eType; } + + void SetOffset(sal_uInt64 nOffset) { m_nOffset = nOffset; } + + sal_uInt64 GetOffset() const { return m_nOffset; } + + void SetDirty(bool bDirty) { m_bDirty = bDirty; } + + bool GetDirty() const { return m_bDirty; } +}; + +/// Hex string: in <AABB> form. +class VCL_DLLPUBLIC PDFHexStringElement final : public PDFElement +{ + OString m_aValue; + +public: + bool Read(SvStream& rStream) override; + const OString& GetValue() const; + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append("<"); + rBuffer.append(m_aValue); + rBuffer.append(">"); + } +}; + +/// Literal string: in (asdf) form. +class VCL_DLLPUBLIC PDFLiteralStringElement final : public PDFElement +{ + OString m_aValue; + +public: + bool Read(SvStream& rStream) override; + const OString& GetValue() const; + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append("("); + rBuffer.append(m_aValue); + rBuffer.append(")"); + } +}; + +/// Numbering object: an integer or a real. +class VCL_DLLPUBLIC PDFNumberElement final : public PDFElement +{ + /// Input file start location. + sal_uInt64 m_nOffset = 0; + /// Input file token length. + sal_uInt64 m_nLength = 0; + double m_fValue = 0; + +public: + PDFNumberElement(); + bool Read(SvStream& rStream) override; + double GetValue() const; + void SetValue(double fValue) { m_fValue = fValue; } + + sal_uInt64 GetLocation() const; + sal_uInt64 GetLength() const; + + void writeString(OStringBuffer& rBuffer) override { rBuffer.append(m_fValue); } +}; + +/// A one-liner comment. +class VCL_DLLPUBLIC PDFCommentElement final : public PDFElement +{ + PDFDocument& m_rDoc; + OString m_aComment; + +public: + explicit PDFCommentElement(PDFDocument& rDoc); + bool Read(SvStream& rStream) override; + void writeString(OStringBuffer& /*rBuffer*/) override {} +}; + +/// End of a dictionary: '>>'. +class VCL_DLLPUBLIC PDFEndDictionaryElement final : public PDFElement +{ + /// Offset before the '>>' token. + sal_uInt64 m_nLocation = 0; + +public: + PDFEndDictionaryElement(); + bool Read(SvStream& rStream) override; + sal_uInt64 GetLocation() const; + + void writeString(OStringBuffer& /*rBuffer*/) override {} +}; + +/// End of a stream: 'endstream' keyword. +class VCL_DLLPUBLIC PDFEndStreamElement final : public PDFElement +{ +public: + bool Read(SvStream& rStream) override; + + void writeString(OStringBuffer& /*rBuffer*/) override {} +}; + +/// End of an object: 'endobj' keyword. +class VCL_DLLPUBLIC PDFEndObjectElement final : public PDFElement +{ +public: + bool Read(SvStream& rStream) override; + + void writeString(OStringBuffer& /*rBuffer*/) override {} +}; + +/// End of an array: ']'. +class VCL_DLLPUBLIC PDFEndArrayElement final : public PDFElement +{ + /// Location before the ']' token. + sal_uInt64 m_nOffset = 0; + +public: + PDFEndArrayElement(); + bool Read(SvStream& rStream) override; + sal_uInt64 GetOffset() const; + + void writeString(OStringBuffer& /*rBuffer*/) override {} +}; + +/// Boolean object: a 'true' or a 'false'. +class VCL_DLLPUBLIC PDFBooleanElement final : public PDFElement +{ + bool m_aValue; + +public: + explicit PDFBooleanElement(bool bValue) + : m_aValue(bValue) + { + } + + bool Read(SvStream& rStream) override; + + void writeString(OStringBuffer& rBuffer) override + { + rBuffer.append(m_aValue ? "true" : "false"); + } +}; + +/// Null object: the 'null' singleton. +class VCL_DLLPUBLIC PDFNullElement final : public PDFElement +{ +public: + bool Read(SvStream& rStream) override; + + void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); } +}; + +/** + * In-memory representation of an on-disk PDF document. + * + * The PDF element list is not meant to be saved back to disk, but some + * elements remember their source offset / length, and based on that it's + * possible to modify the input file. + */ +class VCL_DLLPUBLIC PDFDocument final : public PDFObjectContainer +{ + /// This vector owns all elements. + std::vector<std::unique_ptr<PDFElement>> m_aElements; + /// Object ID <-> object offset map. + std::map<size_t, XRefEntry> m_aXRef; + /// Object offset <-> Object pointer map. + std::map<size_t, PDFObjectElement*> m_aOffsetObjects; + /// Object ID <-> Object pointer map. + std::map<size_t, PDFObjectElement*> m_aIDObjects; + /// List of xref offsets we know. + std::vector<size_t> m_aStartXRefs; + /// Offsets of trailers, from latest to oldest. + std::vector<size_t> m_aTrailerOffsets; + /// Trailer offset <-> Trailer pointer map. + std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers; + /// List of EOF offsets we know. + std::vector<size_t> m_aEOFs; + PDFTrailerElement* m_pTrailer = nullptr; + /// When m_pTrailer is nullptr, this can still have a dictionary. + PDFObjectElement* m_pXRefStream = nullptr; + /// All editing takes place in this buffer, if it happens. + SvMemoryStream m_aEditBuffer; + + /// Signature line in PDF format, to be consumed by the next Sign() invocation. + std::vector<sal_Int8> m_aSignatureLine; + + /// 0-based page number where m_aSignatureLine should be placed. + size_t m_nSignaturePage = 0; + + /// Suggest a minimal, yet free signature ID to use for the next signature. + sal_uInt32 GetNextSignature(); + /// Write the signature object as part of signing. + sal_Int32 WriteSignatureObject(const OUString& rDescription, bool bAdES, + sal_uInt64& rLastByteRangeOffset, sal_Int64& rContentOffset); + /// Write the appearance object as part of signing. + sal_Int32 WriteAppearanceObject(tools::Rectangle& rSignatureRectangle); + /// Write the annot object as part of signing. + sal_Int32 WriteAnnotObject(PDFObjectElement const& rFirstPage, sal_Int32 nSignatureId, + sal_Int32 nAppearanceId, + const tools::Rectangle& rSignatureRectangle); + /// Write the updated Page object as part of signing. + bool WritePageObject(PDFObjectElement& rFirstPage, sal_Int32 nAnnotId); + /// Write the updated Catalog object as part of signing. + bool WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& pRoot); + /// Write the updated cross-references as part of signing. + void WriteXRef(sal_uInt64 nXRefOffset, PDFReferenceElement const* pRoot); + +public: + PDFDocument(); + virtual ~PDFDocument(); + PDFDocument& operator=(const PDFDocument&) = delete; + PDFDocument(const PDFDocument&) = delete; + /// @name Low-level functions, to be used by PDFElement subclasses. + //@{ + /// Decode a hex dump. + static std::vector<unsigned char> DecodeHexString(PDFHexStringElement const* pElement); + static OUString DecodeHexStringUTF16BE(PDFHexStringElement const& rElement); + static OString ReadKeyword(SvStream& rStream); + static size_t FindStartXRef(SvStream& rStream); + void ReadXRef(SvStream& rStream); + void ReadXRefStream(SvStream& rStream); + static void SkipWhitespace(SvStream& rStream); + /// Instead of all whitespace, just skip CR and NL characters. + static void SkipLineBreaks(SvStream& rStream); + size_t GetObjectOffset(size_t nIndex) const; + const std::vector<std::unique_ptr<PDFElement>>& GetElements() const; + std::vector<PDFObjectElement*> GetPages(); + PDFObjectElement* GetCatalog(); + /// Remember the end location of an EOF token. + void PushBackEOF(size_t nOffset); + /// Look up object based on object number, possibly by parsing object streams. + PDFObjectElement* LookupObject(size_t nObjectNumber); + /// Access to the input document, even after the input stream is gone. + SvMemoryStream& GetEditBuffer(); + /// Tokenize elements from current offset. + bool Tokenize(SvStream& rStream, TokenizeMode eMode, + std::vector<std::unique_ptr<PDFElement>>& rElements, + PDFObjectElement* pObjectElement); + /// Register an object (owned directly or indirectly by m_aElements) as a provider for a given ID. + void SetIDObject(size_t nID, PDFObjectElement* pObject); + //@} + + /// @name High-level functions, to be used by others. + //@{ + /// Read elements from the start of the stream till its end. + bool Read(SvStream& rStream); + /// Calls Read() first and if it fails it tries to fixup and then retry. + bool ReadWithPossibleFixup(SvStream& rStream); + void SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine); + void SetSignaturePage(size_t nPage); + /// Sign the read document with xCertificate in the edit buffer. + bool Sign(const css::uno::Reference<css::security::XCertificate>& xCertificate, + const OUString& rDescription, bool bAdES); + /// Serializes the contents of the edit buffer. + bool Write(SvStream& rStream); + /// Get a list of signatures embedded into this document. + std::vector<PDFObjectElement*> GetSignatureWidgets(); + /// Remove the nth signature from read document in the edit buffer. + bool RemoveSignature(size_t nPosition); + //@} + + /// See vcl::PDFObjectContainer::createObject(). + sal_Int32 createObject() override; + /// See vcl::PDFObjectContainer::updateObject(). + bool updateObject(sal_Int32 n) override; + /// See vcl::PDFObjectContainer::writeBuffer(). + bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) override; + void checkAndEnableStreamEncryption(sal_Int32 /*nObject*/) override {} + void disableStreamEncryption() override {} +}; + +/// The trailer singleton is at the end of the doc. +class VCL_DLLPUBLIC PDFTrailerElement final : public PDFElement +{ + PDFDocument& m_rDoc; + PDFDictionaryElement* m_pDictionaryElement; + /// Location of the end of the trailer token. + sal_uInt64 m_nOffset = 0; + +public: + explicit PDFTrailerElement(PDFDocument& rDoc); + bool Read(SvStream& rStream) override; + PDFElement* Lookup(const OString& rDictionaryKey); + sal_uInt64 GetLocation() const; + + void SetDictionary(PDFDictionaryElement* pDictionaryElement) + { + m_pDictionaryElement = pDictionaryElement; + } + + PDFDictionaryElement* GetDictionary() { return m_pDictionaryElement; } + + void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); } +}; + +class VCL_DLLPUBLIC PDFObjectParser final +{ + const std::vector<std::unique_ptr<PDFElement>>& mrElements; + +public: + PDFObjectParser(std::vector<std::unique_ptr<PDFElement>> const& rElements) + : mrElements(rElements) + { + } + + size_t parse(PDFElement* pParsingElement, size_t nStartIndex = 0, int nCurrentDepth = 0); +}; + +} // namespace vcl::filter + +#endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/include/vcl/filter/pdfobjectcontainer.hxx b/include/vcl/filter/pdfobjectcontainer.hxx new file mode 100644 index 000000000..f6614f09e --- /dev/null +++ b/include/vcl/filter/pdfobjectcontainer.hxx @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include <sal/types.h> + +namespace vcl +{ +/// Allows creating, updating and writing PDF objects in a container. +class SAL_NO_VTABLE SAL_DLLPUBLIC_RTTI PDFObjectContainer +{ +public: + /* adds an entry to m_aObjects and returns its index+1, + * sets the offset to ~0 + */ + virtual sal_Int32 createObject() = 0; + /* sets the offset of object n to the current position of output file+1 + */ + virtual bool updateObject(sal_Int32 n) = 0; + + // Write pBuffer to the end of the output. + virtual bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) = 0; + + virtual void checkAndEnableStreamEncryption(sal_Int32 nObject) = 0; + + virtual void disableStreamEncryption() = 0; + +protected: + ~PDFObjectContainer() noexcept = default; +}; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |