Adding upstream version 4:7.4.7.upstream/4%7.4.7 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:06:44 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:06:44 +0000
commit: ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree: 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /sdext/source/pdfimport/pdfparse
parent: Initial commit. (diff)
download: libreoffice-cb75148ebd0135178ff46f89a30139c44f8d2040.tar.xz
libreoffice-cb75148ebd0135178ff46f89a30139c44f8d2040.zip
2 files changed, 2162 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/pdfparse/pdfentries.cxx b/sdext/source/pdfimport/pdfparse/pdfentries.cxx
new file mode 100644
index 000000000..6fba80f70
--- /dev/null
+++ b/sdext/source/pdfimport/pdfparse/pdfentries.cxx
@@ -0,0 +1,1461 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <pdfparse.hxx>
+
+#include <comphelper/hash.hxx>
+
+#include <rtl/strbuf.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <rtl/digest.h>
+#include <rtl/cipher.h>
+#include <sal/log.hxx>
+
+#include <zlib.h>
+
+#include <math.h>
+#include <map>
+
+#include <string.h>
+
+
+namespace pdfparse
+{
+
+struct EmitImplData
+{
+    // xref table: maps object number to a pair of (generation, buffer offset)
+    typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
+    XRefTable m_aXRefTable;
+    // container of all indirect objects (usually a PDFFile*)
+    const PDFContainer* m_pObjectContainer;
+    unsigned int m_nDecryptObject;
+    unsigned int m_nDecryptGeneration;
+
+    // returns true if the xref table was updated
+    bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
+    {
+        XRefTable::iterator it = m_aXRefTable.find( nObject );
+        if( it == m_aXRefTable.end() )
+        {
+            // new entry
+            m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
+            return true;
+        }
+        // update old entry, if generation number is higher
+        if( it->second.first < nGeneration )
+        {
+            it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
+            return true;
+        }
+        return false;
+    }
+
+    explicit EmitImplData( const PDFContainer* pTopContainer ) :
+        m_pObjectContainer( pTopContainer ),
+        m_nDecryptObject( 0 ),
+        m_nDecryptGeneration( 0 )
+    {}
+    void decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
+                  unsigned int nObject, unsigned int nGeneration ) const
+    {
+        const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
+        pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
+    }
+
+    void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
+    {
+        m_nDecryptObject = nObject;
+        m_nDecryptGeneration = nGeneration;
+    }
+};
+
+}
+
+using namespace pdfparse;
+
+EmitContext::EmitContext( const PDFContainer* pTop ) :
+    m_bDeflate( false ),
+    m_bDecrypt( false )
+{
+    if( pTop )
+        m_pImplData.reset( new EmitImplData( pTop ) );
+}
+
+EmitContext::~EmitContext()
+{
+}
+
+PDFEntry::~PDFEntry()
+{
+}
+
+EmitImplData* PDFEntry::getEmitData( EmitContext const & rContext )
+{
+    return rContext.m_pImplData.get();
+}
+
+void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
+{
+    if( rContext.m_pImplData && rContext.m_pImplData.get() != pNewEmitData )
+        rContext.m_pImplData.reset();
+    rContext.m_pImplData.reset( pNewEmitData );
+}
+
+PDFValue::~PDFValue()
+{
+}
+
+PDFComment::~PDFComment()
+{
+}
+
+bool PDFComment::emit( EmitContext& rWriteContext ) const
+{
+    return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
+}
+
+PDFEntry* PDFComment::clone() const
+{
+    return new PDFComment( m_aComment );
+}
+
+PDFName::~PDFName()
+{
+}
+
+bool PDFName::emit( EmitContext& rWriteContext ) const
+{
+    if( ! rWriteContext.write( " /", 2 ) )
+        return false;
+    return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
+}
+
+PDFEntry* PDFName::clone() const
+{
+    return new PDFName( m_aName );
+}
+
+OUString PDFName::getFilteredName() const
+{
+    OStringBuffer aFilter( m_aName.getLength() );
+    const char* pStr = m_aName.getStr();
+    unsigned int nLen = m_aName.getLength();
+    for( unsigned int i = 0; i < nLen; i++ )
+    {
+        if( (i < nLen - 3) && pStr[i] == '#' )
+        {
+            char rResult = 0;
+            i++;
+            if( pStr[i] >= '0' && pStr[i] <= '9' )
+                rResult = char( pStr[i]-'0' ) << 4;
+            else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
+                rResult = char( pStr[i]-'a' + 10 ) << 4;
+            else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
+                rResult = char( pStr[i]-'A' + 10 ) << 4;
+            i++;
+            if( pStr[i] >= '0' && pStr[i] <= '9' )
+                rResult |= char( pStr[i]-'0' );
+            else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
+                rResult |= char( pStr[i]-'a' + 10 );
+            else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
+                rResult |= char( pStr[i]-'A' + 10 );
+            aFilter.append( rResult );
+        }
+        else
+            aFilter.append( pStr[i] );
+    }
+    return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
+}
+
+PDFString::~PDFString()
+{
+}
+
+bool PDFString::emit( EmitContext& rWriteContext ) const
+{
+    if( ! rWriteContext.write( " ", 1 ) )
+        return false;
+    EmitImplData* pEData = getEmitData( rWriteContext );
+    if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
+    {
+        OString aFiltered( getFilteredString() );
+        // decrypt inplace (evil since OString is supposed to be const
+        // however in this case we know that getFilteredString returned a singular string instance
+        pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
+                         reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
+                         pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
+        // check for string or hex string
+        const char* pStr = aFiltered.getStr();
+        if( aFiltered.getLength() > 1 &&
+           ( (static_cast<unsigned char>(pStr[0]) == 0xff && static_cast<unsigned char>(pStr[1]) == 0xfe) ||
+             (static_cast<unsigned char>(pStr[0]) == 0xfe && static_cast<unsigned char>(pStr[1]) == 0xff) ) )
+        {
+            static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+                                              '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+            if( ! rWriteContext.write( "<", 1 ) )
+                return false;
+            for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
+            {
+                if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
+                    return false;
+                if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
+                    return false;
+            }
+            if( ! rWriteContext.write( ">", 1 ) )
+                return false;
+        }
+        else
+        {
+            if( ! rWriteContext.write( "(", 1 ) )
+                return false;
+            if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
+                return false;
+            if( ! rWriteContext.write( ")", 1 ) )
+                return false;
+        }
+        return true;
+    }
+    return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
+}
+
+PDFEntry* PDFString::clone() const
+{
+    return new PDFString( m_aString );
+}
+
+OString PDFString::getFilteredString() const
+{
+    int nLen = m_aString.getLength();
+    OStringBuffer aBuf( nLen );
+
+    const char* pStr = m_aString.getStr();
+    if( *pStr == '(' )
+    {
+        const char* pRun = pStr+1;
+        while( pRun - pStr < nLen-1 )
+        {
+            if( *pRun == '\\' )
+            {
+                pRun++;
+                if( pRun - pStr < nLen )
+                {
+                    char aEsc = 0;
+                    if( *pRun == 'n' )
+                        aEsc = '\n';
+                    else if( *pRun == 'r' )
+                        aEsc = '\r';
+                    else if( *pRun == 't' )
+                        aEsc = '\t';
+                    else if( *pRun == 'b' )
+                        aEsc = '\b';
+                    else if( *pRun == 'f' )
+                        aEsc = '\f';
+                    else if( *pRun == '(' )
+                        aEsc = '(';
+                    else if( *pRun == ')' )
+                        aEsc = ')';
+                    else if( *pRun == '\\' )
+                        aEsc = '\\';
+                    else if( *pRun == '\n' )
+                    {
+                        pRun++;
+                        continue;
+                    }
+                    else if( *pRun == '\r' )
+                    {
+                        pRun++;
+                        if( *pRun == '\n' )
+                            pRun++;
+                        continue;
+                    }
+                    else
+                    {
+                        int i = 0;
+                        while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
+                            aEsc = 8*aEsc + (*pRun++ - '0');
+                        // move pointer back to last character of octal sequence
+                        pRun--;
+                    }
+                    aBuf.append( aEsc );
+                }
+            }
+            else
+                aBuf.append( *pRun );
+            // move pointer to next character
+            pRun++;
+        }
+    }
+    else if( *pStr == '<' )
+    {
+        const char* pRun = pStr+1;
+        while( *pRun != '>' && pRun - pStr < nLen )
+        {
+            char rResult = 0;
+            if( *pRun >= '0' && *pRun <= '9' )
+                rResult = char( ( *pRun-'0' ) << 4 );
+            else if( *pRun >= 'a' && *pRun <= 'f' )
+                rResult = char( ( *pRun-'a' + 10 ) << 4 );
+            else if( *pRun >= 'A' && *pRun <= 'F' )
+                rResult = char( ( *pRun-'A' + 10 ) << 4 );
+            pRun++;
+            if( *pRun != '>' && pRun - pStr < nLen )
+            {
+                if( *pRun >= '0' && *pRun <= '9' )
+                    rResult |= char( *pRun-'0' );
+                else if( *pRun >= 'a' && *pRun <= 'f' )
+                    rResult |= char( *pRun-'a' + 10 );
+                else if( *pRun >= 'A' && *pRun <= 'F' )
+                    rResult |= char( *pRun-'A' + 10 );
+            }
+            pRun++;
+            aBuf.append( rResult );
+        }
+    }
+
+    return aBuf.makeStringAndClear();
+}
+
+PDFNumber::~PDFNumber()
+{
+}
+
+bool PDFNumber::emit( EmitContext& rWriteContext ) const
+{
+    OStringBuffer aBuf( 32 );
+    aBuf.append( ' ' );
+
+    double fValue = m_fValue;
+    bool bNeg = false;
+    int nPrecision = 5;
+    if( fValue < 0.0 )
+    {
+        bNeg = true;
+        fValue=-fValue;
+    }
+
+    sal_Int64 nInt = static_cast<sal_Int64>(fValue);
+    fValue -= static_cast<double>(nInt);
+    // optimizing hardware may lead to a value of 1.0 after the subtraction
+    if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
+    {
+        nInt++;
+        fValue = 0.0;
+    }
+    sal_Int64 nFrac = 0;
+    if( fValue )
+    {
+        fValue *= pow( 10.0, static_cast<double>(nPrecision) );
+        nFrac = static_cast<sal_Int64>(fValue);
+    }
+    if( bNeg && ( nInt || nFrac ) )
+        aBuf.append( '-' );
+    aBuf.append( nInt );
+    if( nFrac )
+    {
+        int i;
+        aBuf.append( '.' );
+        sal_Int64 nBound = static_cast<sal_Int64>(pow( 10.0, nPrecision - 1.0 )+0.5);
+        for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
+        {
+            sal_Int64 nNumb = nFrac / nBound;
+            nFrac -= nNumb * nBound;
+            aBuf.append( nNumb );
+            nBound /= 10;
+        }
+    }
+
+    return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
+}
+
+PDFEntry* PDFNumber::clone() const
+{
+    return new PDFNumber( m_fValue );
+}
+
+
+PDFBool::~PDFBool()
+{
+}
+
+bool PDFBool::emit( EmitContext& rWriteContext ) const
+{
+    return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
+}
+
+PDFEntry* PDFBool::clone() const
+{
+    return new PDFBool( m_bValue );
+}
+
+PDFNull::~PDFNull()
+{
+}
+
+bool PDFNull::emit( EmitContext& rWriteContext ) const
+{
+    return rWriteContext.write( " null", 5 );
+}
+
+PDFEntry* PDFNull::clone() const
+{
+    return new PDFNull();
+}
+
+
+PDFObjectRef::~PDFObjectRef()
+{
+}
+
+bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
+{
+    OString aBuf =
+        " " +
+        OString::number( sal_Int32( m_nNumber ) ) +
+        " " +
+        OString::number( sal_Int32( m_nGeneration ) ) +
+        " R";
+    return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
+}
+
+PDFEntry* PDFObjectRef::clone() const
+{
+    return new PDFObjectRef( m_nNumber, m_nGeneration );
+}
+
+PDFContainer::~PDFContainer()
+{
+}
+
+bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
+{
+    int nEle = m_aSubElements.size();
+    for( int i = 0; i < nEle; i++ )
+    {
+        if( rWriteContext.m_bDecrypt )
+        {
+            const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
+            if (pName && pName->m_aName == "Encrypt")
+            {
+                i++;
+                continue;
+            }
+        }
+        if( ! m_aSubElements[i]->emit( rWriteContext ) )
+            return false;
+    }
+    return true;
+}
+
+void PDFContainer::cloneSubElements( std::vector<std::unique_ptr<PDFEntry>>& rNewSubElements ) const
+{
+    int nEle = m_aSubElements.size();
+    for( int i = 0; i < nEle; i++ )
+        rNewSubElements.emplace_back( m_aSubElements[i]->clone() );
+}
+
+PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
+{
+    unsigned int nEle = m_aSubElements.size();
+    for( unsigned int i = 0; i < nEle; i++ )
+    {
+        PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i].get());
+        if( pObject &&
+            pObject->m_nNumber == nNumber &&
+            pObject->m_nGeneration == nGeneration )
+        {
+            return pObject;
+        }
+    }
+    return nullptr;
+}
+
+PDFArray::~PDFArray()
+{
+}
+
+bool PDFArray::emit( EmitContext& rWriteContext ) const
+{
+    if( ! rWriteContext.write( "[", 1 ) )
+        return false;
+    if( ! emitSubElements( rWriteContext ) )
+        return false;
+    return rWriteContext.write( "]", 1 );
+}
+
+PDFEntry* PDFArray::clone() const
+{
+    PDFArray* pNewAr = new PDFArray();
+    cloneSubElements( pNewAr->m_aSubElements );
+    return pNewAr;
+}
+
+PDFDict::~PDFDict()
+{
+}
+
+bool PDFDict::emit( EmitContext& rWriteContext ) const
+{
+    if( ! rWriteContext.write( "<<\n", 3 ) )
+        return false;
+    if( ! emitSubElements( rWriteContext ) )
+        return false;
+    return rWriteContext.write( "\n>>\n", 4 );
+}
+
+void PDFDict::insertValue( const OString& rName, std::unique_ptr<PDFEntry> pValue )
+{
+    if( ! pValue )
+        eraseValue( rName );
+
+    PDFEntry* pValueTmp = nullptr;
+    std::unordered_map<OString,PDFEntry*>::iterator it = m_aMap.find( rName );
+    if( it == m_aMap.end() )
+    {
+        // new name/value, pair, append it
+        m_aSubElements.emplace_back(std::make_unique<PDFName>(rName));
+        m_aSubElements.emplace_back( std::move(pValue) );
+        pValueTmp = m_aSubElements.back().get();
+    }
+    else
+    {
+        unsigned int nSub = m_aSubElements.size();
+        for( unsigned int i = 0; i < nSub; i++ )
+            if( m_aSubElements[i].get() == it->second )
+            {
+                m_aSubElements[i] = std::move(pValue);
+                pValueTmp = m_aSubElements[i].get();
+                break;
+            }
+    }
+    assert(pValueTmp);
+    m_aMap[ rName ] = pValueTmp;
+}
+
+void PDFDict::eraseValue( std::string_view rName )
+{
+    unsigned int nEle = m_aSubElements.size();
+    for( unsigned int i = 0; i < nEle; i++ )
+    {
+        PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i].get());
+        if( pName && pName->m_aName == rName )
+        {
+            for( unsigned int j = i+1; j < nEle; j++ )
+            {
+                if( dynamic_cast<PDFComment*>(m_aSubElements[j].get()) == nullptr )
+                {
+                    // remove and free subelements from vector
+                    m_aSubElements.erase( m_aSubElements.begin()+j );
+                    m_aSubElements.erase( m_aSubElements.begin()+i );
+                    buildMap();
+                    return;
+                }
+            }
+        }
+    }
+}
+
+PDFEntry* PDFDict::buildMap()
+{
+    // clear map
+    m_aMap.clear();
+    // build map
+    unsigned int nEle = m_aSubElements.size();
+    PDFName* pName = nullptr;
+    for( unsigned int i = 0; i < nEle; i++ )
+    {
+        if( dynamic_cast<PDFComment*>(m_aSubElements[i].get()) == nullptr )
+        {
+            if( pName )
+            {
+                m_aMap[ pName->m_aName ] = m_aSubElements[i].get();
+                pName = nullptr;
+            }
+            else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i].get())) == nullptr )
+                return m_aSubElements[i].get();
+        }
+    }
+    return pName;
+}
+
+PDFEntry* PDFDict::clone() const
+{
+    PDFDict* pNewDict = new PDFDict();
+    cloneSubElements( pNewDict->m_aSubElements );
+    pNewDict->buildMap();
+    return pNewDict;
+}
+
+PDFStream::~PDFStream()
+{
+}
+
+bool PDFStream::emit( EmitContext& rWriteContext ) const
+{
+    return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
+}
+
+PDFEntry* PDFStream::clone() const
+{
+    return new PDFStream( m_nBeginOffset, m_nEndOffset, nullptr );
+}
+
+unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
+{
+    if( ! m_pDict )
+        return 0;
+    // find /Length entry, can either be a direct or indirect number object
+    std::unordered_map<OString,PDFEntry*>::const_iterator it =
+        m_pDict->m_aMap.find( "Length" );
+    if( it == m_pDict->m_aMap.end() )
+        return 0;
+    PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
+    if( ! pNum && pContainer )
+    {
+        PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
+        if( pRef )
+        {
+            int nEle = pContainer->m_aSubElements.size();
+            for (int i = 0; i < nEle; i++)
+            {
+                PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i].get());
+                if( pObj &&
+                    pObj->m_nNumber == pRef->m_nNumber &&
+                    pObj->m_nGeneration == pRef->m_nGeneration )
+                {
+                    if( pObj->m_pObject )
+                        pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
+                    break;
+                }
+            }
+        }
+    }
+    return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
+}
+
+PDFObject::~PDFObject()
+{
+}
+
+bool PDFObject::getDeflatedStream( std::unique_ptr<char[]>& rpStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
+{
+    bool bIsDeflated = false;
+    if( m_pStream && m_pStream->m_pDict &&
+        m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
+        )
+    {
+        unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
+        rpStream.reset(new char[ nOuterStreamLen ]);
+        unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, rpStream.get() );
+        if( nRead != nOuterStreamLen )
+        {
+            rpStream.reset();
+            *pBytes = 0;
+            return false;
+        }
+        // is there a filter entry ?
+        std::unordered_map<OString,PDFEntry*>::const_iterator it =
+            m_pStream->m_pDict->m_aMap.find( "Filter" );
+        if( it != m_pStream->m_pDict->m_aMap.end() )
+        {
+            PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
+            if( ! pFilter )
+            {
+                PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
+                if( pArray && ! pArray->m_aSubElements.empty() )
+                {
+                    pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
+                }
+            }
+
+            // is the (first) filter FlateDecode ?
+            if (pFilter && pFilter->m_aName == "FlateDecode")
+            {
+                bIsDeflated = true;
+            }
+        }
+        // prepare compressed data section
+        char* pStream = rpStream.get();
+        if( pStream[0] == 's' )
+            pStream += 6; // skip "stream"
+        // skip line end after "stream"
+        while( *pStream == '\r' || *pStream == '\n' )
+            pStream++;
+        // get the compressed length
+        *pBytes = m_pStream->getDictLength( pObjectContainer );
+        if( pStream != rpStream.get() )
+            memmove( rpStream.get(), pStream, *pBytes );
+        if( rContext.m_bDecrypt )
+        {
+            EmitImplData* pEData = getEmitData( rContext );
+            pEData->decrypt( reinterpret_cast<const sal_uInt8*>(rpStream.get()),
+                             *pBytes,
+                             reinterpret_cast<sal_uInt8*>(rpStream.get()),
+                             m_nNumber,
+                             m_nGeneration
+                             ); // decrypt inplace
+        }
+    }
+    else
+    {
+        *pBytes = 0;
+    }
+    return bIsDeflated;
+}
+
+static void unzipToBuffer( char* pBegin, unsigned int nLen,
+                           sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
+{
+    z_stream aZStr;
+    aZStr.next_in       = reinterpret_cast<Bytef *>(pBegin);
+    aZStr.avail_in      = nLen;
+    aZStr.total_out = aZStr.total_in = 0;
+    aZStr.zalloc        = nullptr;
+    aZStr.zfree         = nullptr;
+    aZStr.opaque        = nullptr;
+
+    int err = inflateInit(&aZStr);
+
+    const unsigned int buf_increment_size = 16384;
+
+    if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, buf_increment_size)))
+    {
+        *pOutBuf = p;
+        aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
+        aZStr.avail_out = buf_increment_size;
+        *pOutLen = buf_increment_size;
+    }
+    else
+        err = Z_MEM_ERROR;
+    while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
+    {
+        err = inflate( &aZStr, Z_NO_FLUSH );
+        if( aZStr.avail_out == 0 )
+        {
+            if( err != Z_STREAM_END )
+            {
+                const int nNewAlloc = *pOutLen + buf_increment_size;
+                if (auto p = static_cast<sal_uInt8*>(std::realloc(*pOutBuf, nNewAlloc)))
+                {
+                    *pOutBuf = p;
+                    aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
+                    aZStr.avail_out = buf_increment_size;
+                    *pOutLen = nNewAlloc;
+                }
+                else
+                    err = Z_MEM_ERROR;
+            }
+        }
+    }
+    if( err == Z_STREAM_END )
+    {
+        if( aZStr.avail_out > 0 )
+            *pOutLen -= aZStr.avail_out;
+    }
+    inflateEnd(&aZStr);
+    if( err < Z_OK )
+    {
+        std::free( *pOutBuf );
+        *pOutBuf = nullptr;
+        *pOutLen = 0;
+    }
+}
+
+void PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
+{
+    if( !m_pStream )
+        return;
+
+    std::unique_ptr<char[]> pStream;
+    unsigned int nBytes = 0;
+    if( getDeflatedStream( pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
+    {
+        sal_uInt8* pOutBytes = nullptr;
+        sal_uInt32 nOutBytes = 0;
+        unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
+        rWriteContext.write( pOutBytes, nOutBytes );
+        std::free( pOutBytes );
+    }
+    else if( pStream && nBytes )
+        rWriteContext.write( pStream.get(), nBytes );
+}
+
+bool PDFObject::emit( EmitContext& rWriteContext ) const
+{
+    if( ! rWriteContext.write( "\n", 1 ) )
+        return false;
+
+    EmitImplData* pEData = getEmitData( rWriteContext );
+    if( pEData )
+        pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
+
+    OString aBuf =
+        OString::number( sal_Int32( m_nNumber ) ) +
+        " " +
+        OString::number( sal_Int32( m_nGeneration ) ) +
+        " obj\n";
+    if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
+        return false;
+
+    if( pEData )
+        pEData->setDecryptObject( m_nNumber, m_nGeneration );
+    if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
+    {
+        std::unique_ptr<char[]> pStream;
+        unsigned int nBytes = 0;
+        bool bDeflate = getDeflatedStream( pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
+        if( pStream && nBytes )
+        {
+            // unzip the stream
+            sal_uInt8* pOutBytes = nullptr;
+            sal_uInt32 nOutBytes = 0;
+            if( bDeflate && rWriteContext.m_bDeflate )
+                unzipToBuffer( pStream.get(), nBytes, &pOutBytes, &nOutBytes );
+            else
+            {
+                // nothing to deflate, but decryption has happened
+                pOutBytes = reinterpret_cast<sal_uInt8*>(pStream.get());
+                nOutBytes = static_cast<sal_uInt32>(nBytes);
+            }
+
+            if( nOutBytes )
+            {
+                // clone this object
+                std::unique_ptr<PDFObject> pClone(static_cast<PDFObject*>(clone()));
+                // set length in the dictionary to new stream length
+                std::unique_ptr<PDFNumber> pNewLen(new PDFNumber( double(nOutBytes) ));
+                pClone->m_pStream->m_pDict->insertValue( "Length", std::move(pNewLen) );
+
+                if( bDeflate && rWriteContext.m_bDeflate )
+                {
+                    // delete flatedecode filter
+                    std::unordered_map<OString,PDFEntry*>::const_iterator it =
+                    pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
+                    if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
+                    {
+                        PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
+                        if (pFilter && pFilter->m_aName == "FlateDecode")
+                            pClone->m_pStream->m_pDict->eraseValue( "Filter" );
+                        else
+                        {
+                            PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
+                            if( pArray && ! pArray->m_aSubElements.empty() )
+                            {
+                                pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front().get());
+                                if (pFilter && pFilter->m_aName == "FlateDecode")
+                                {
+                                    pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // write sub elements except stream
+                bool bRet = true;
+                unsigned int nEle = pClone->m_aSubElements.size();
+                for( unsigned int i = 0; i < nEle && bRet; i++ )
+                {
+                    if( pClone->m_aSubElements[i].get() != pClone->m_pStream )
+                        bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
+                }
+                pClone.reset();
+                // write stream
+                if( bRet )
+                    bRet = rWriteContext.write("stream\n", 7)
+                           && rWriteContext.write(pOutBytes, nOutBytes)
+                           && rWriteContext.write("\nendstream\nendobj\n", 18);
+                if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
+                    std::free( pOutBytes );
+                pEData->setDecryptObject( 0, 0 );
+                return bRet;
+            }
+            if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream.get()) )
+                std::free( pOutBytes );
+        }
+    }
+
+    bool bRet = emitSubElements( rWriteContext ) &&
+                rWriteContext.write( "\nendobj\n", 8 );
+    if( pEData )
+        pEData->setDecryptObject( 0, 0 );
+    return bRet;
+}
+
+PDFEntry* PDFObject::clone() const
+{
+    PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
+    cloneSubElements( pNewOb->m_aSubElements );
+    unsigned int nEle = m_aSubElements.size();
+    for( unsigned int i = 0; i < nEle; i++ )
+    {
+        if( m_aSubElements[i].get() == m_pObject )
+            pNewOb->m_pObject = pNewOb->m_aSubElements[i].get();
+        else if( m_aSubElements[i].get() == m_pStream && pNewOb->m_pObject )
+        {
+            pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i].get());
+            PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
+            if (pNewDict && pNewOb->m_pStream)
+                pNewOb->m_pStream->m_pDict = pNewDict;
+        }
+    }
+    return pNewOb;
+}
+
+PDFTrailer::~PDFTrailer()
+{
+}
+
+bool PDFTrailer::emit( EmitContext& rWriteContext ) const
+{
+    // get xref offset
+    unsigned int nXRefPos = rWriteContext.getCurPos();
+    // begin xref section, object 0 is always free
+    if( ! rWriteContext.write( "xref\r\n"
+                               "0 1\r\n"
+                               "0000000000 65535 f\r\n", 31 ) )
+        return false;
+    // check if we are emitting a complete PDF file
+    EmitImplData* pEData = getEmitData( rWriteContext );
+    if( pEData )
+    {
+        // emit object xrefs
+        const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
+        EmitImplData::XRefTable::const_iterator section_begin, section_end;
+        section_begin = rXRefs.begin();
+        while( section_begin != rXRefs.end() )
+        {
+            // find end of continuous object numbers
+            section_end = section_begin;
+            unsigned int nLast = section_begin->first;
+            while( (++section_end) != rXRefs.end() &&
+                   section_end->first == nLast+1 )
+                nLast = section_end->first;
+            // write first object number and number of following entries
+            OStringBuffer aBuf( 21 );
+            aBuf.append( sal_Int32( section_begin->first ) );
+            aBuf.append( ' ' );
+            aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
+            aBuf.append( "\r\n" );
+            if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
+                return false;
+            while( section_begin != section_end )
+            {
+                // write 20 char entry of form
+                // 0000offset 00gen n\r\n
+                aBuf.setLength( 0 );
+                OString aOffset( OString::number( section_begin->second.second ) );
+                int nPad = 10 - aOffset.getLength();
+                for( int i = 0; i < nPad; i++ )
+                    aBuf.append( '0' );
+                aBuf.append( aOffset );
+                aBuf.append( ' ' );
+                OString aGeneration( OString::number( section_begin->second.first ) );
+                nPad = 5 - aGeneration.getLength();
+                for( int i = 0; i < nPad; i++ )
+                    aBuf.append( '0' );
+                aBuf.append( aGeneration );
+                aBuf.append( " n\r\n" );
+                if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
+                    return false;
+                ++section_begin;
+            }
+        }
+    }
+    if( ! rWriteContext.write( "trailer\n", 8 ) )
+        return false;
+    if( ! emitSubElements( rWriteContext ) )
+        return false;
+    if( ! rWriteContext.write( "startxref\n", 10 ) )
+        return false;
+    OString aOffset( OString::number( nXRefPos ) );
+    if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
+        return false;
+    return rWriteContext.write( "\n%%EOF\n", 7 );
+}
+
+PDFEntry* PDFTrailer::clone() const
+{
+    PDFTrailer* pNewTr = new PDFTrailer();
+    cloneSubElements( pNewTr->m_aSubElements );
+    unsigned int nEle = m_aSubElements.size();
+    for( unsigned int i = 0; i < nEle; i++ )
+    {
+        if( m_aSubElements[i].get() == m_pDict )
+        {
+            pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i].get());
+            break;
+        }
+    }
+    return pNewTr;
+}
+
+#define ENCRYPTION_KEY_LEN 16
+#define ENCRYPTION_BUF_LEN 32
+
+namespace pdfparse {
+struct PDFFileImplData
+{
+    bool        m_bIsEncrypted;
+    bool        m_bStandardHandler;
+    sal_uInt32  m_nAlgoVersion;
+    sal_uInt32  m_nStandardRevision;
+    sal_uInt32  m_nKeyLength;
+    sal_uInt8   m_aOEntry[32] = {};
+    sal_uInt8   m_aUEntry[32] = {};
+    sal_uInt32  m_nPEntry;
+    OString     m_aDocID;
+    rtlCipher   m_aCipher;
+
+    sal_uInt8   m_aDecryptionKey[ENCRYPTION_KEY_LEN+5] = {}; // maximum handled key length
+
+    PDFFileImplData() :
+        m_bIsEncrypted( false ),
+        m_bStandardHandler( false ),
+        m_nAlgoVersion( 0 ),
+        m_nStandardRevision( 0 ),
+        m_nKeyLength( 0 ),
+        m_nPEntry( 0 ),
+        m_aCipher( nullptr )
+    {
+    }
+
+    ~PDFFileImplData()
+    {
+        if( m_aCipher )
+            rtl_cipher_destroyARCFOUR( m_aCipher );
+    }
+};
+}
+
+PDFFile::PDFFile()
+   :  m_nMajor( 0 ), m_nMinor( 0 )
+{
+}
+
+PDFFile::~PDFFile()
+{
+}
+
+bool PDFFile::isEncrypted() const
+{
+    return impl_getData()->m_bIsEncrypted;
+}
+
+bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
+                       unsigned int nObject, unsigned int nGeneration ) const
+{
+    if( ! isEncrypted() )
+        return false;
+
+    if( ! m_pData->m_aCipher )
+        m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
+
+    // modify encryption key
+    sal_uInt32 i = m_pData->m_nKeyLength;
+    m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
+    m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
+    m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
+    m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
+    m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
+
+    ::std::vector<unsigned char> const aSum(::comphelper::Hash::calculateHash(
+                m_pData->m_aDecryptionKey, i, ::comphelper::HashType::MD5));
+
+    if( i > 16 )
+        i = 16;
+
+    rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
+                                                  rtl_Cipher_DirectionDecode,
+                                                  aSum.data(), i,
+                                                  nullptr, 0 );
+    if( aErr == rtl_Cipher_E_None )
+        aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
+                                         pInBuffer, nLen,
+                                         pOutBuffer, nLen );
+    return aErr == rtl_Cipher_E_None;
+}
+
+const sal_uInt8 nPadString[32] =
+{
+    0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
+    0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
+};
+
+static void pad_or_truncate_to_32( const OString& rStr, char* pBuffer )
+{
+    int nLen = rStr.getLength();
+    if( nLen > 32 )
+        nLen = 32;
+    const char* pStr = rStr.getStr();
+    memcpy( pBuffer, pStr, nLen );
+    int i = 0;
+    while( nLen < 32 )
+        pBuffer[nLen++] = nPadString[i++];
+}
+
+// pass at least pData->m_nKeyLength bytes in
+static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData const * pData, bool bComputeO )
+{
+    // see PDF reference 1.4 Algorithm 3.2
+    // encrypt pad string
+    char aPadPwd[ENCRYPTION_BUF_LEN];
+    pad_or_truncate_to_32( rPwd, aPadPwd );
+    ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
+    aDigest.update(reinterpret_cast<unsigned char const*>(aPadPwd), sizeof(aPadPwd));
+    if( ! bComputeO )
+    {
+        aDigest.update(pData->m_aOEntry, 32);
+        sal_uInt8 aPEntry[4];
+        aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
+        aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
+        aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
+        aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
+        aDigest.update(aPEntry, sizeof(aPEntry));
+        aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
+    }
+    ::std::vector<unsigned char> nSum(aDigest.finalize());
+    if( pData->m_nStandardRevision == 3 )
+    {
+        for( int i = 0; i < 50; i++ )
+        {
+            nSum = ::comphelper::Hash::calculateHash(nSum.data(), nSum.size(),
+                    ::comphelper::HashType::MD5);
+        }
+    }
+    sal_uInt32 nLen = pData->m_nKeyLength;
+    if( nLen > RTL_DIGEST_LENGTH_MD5 )
+        nLen = RTL_DIGEST_LENGTH_MD5;
+    memcpy( pOutKey, nSum.data(), nLen );
+    return nLen;
+}
+
+static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
+{
+    // see PDF reference 1.4 Algorithm 3.6
+    bool bValid = false;
+    sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
+    sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
+    // save (at this time potential) decryption key for later use
+    memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
+    if( pData->m_nStandardRevision == 2 )
+    {
+        sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN] = {};
+        // see PDF reference 1.4 Algorithm 3.4
+        // encrypt pad string
+        if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
+                                    aKey, nKeyLen,
+                                    nullptr, 0 )
+            != rtl_Cipher_E_None)
+        {
+            return false; //TODO: differentiate "failed to decrypt" from "wrong password"
+        }
+        rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
+                                  nEncryptedEntry, sizeof( nEncryptedEntry ) );
+        bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
+    }
+    else if( pData->m_nStandardRevision == 3 )
+    {
+        // see PDF reference 1.4 Algorithm 3.5
+        ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
+        aDigest.update(nPadString, sizeof(nPadString));
+        aDigest.update(reinterpret_cast<unsigned char const*>(pData->m_aDocID.getStr()), pData->m_aDocID.getLength());
+        ::std::vector<unsigned char> nEncryptedEntry(aDigest.finalize());
+        if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
+                                    aKey, sizeof(aKey), nullptr, 0 )
+            != rtl_Cipher_E_None)
+        {
+            return false; //TODO: differentiate "failed to decrypt" from "wrong password"
+        }
+        rtl_cipher_encodeARCFOUR( pData->m_aCipher,
+                                  nEncryptedEntry.data(), 16,
+                                  nEncryptedEntry.data(), 16 ); // encrypt in place
+        for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
+        {
+            sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
+            for( size_t j = 0; j < sizeof(aTempKey); j++ )
+                aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
+
+            if (rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
+                                        aTempKey, sizeof(aTempKey), nullptr, 0 )
+                != rtl_Cipher_E_None)
+            {
+                return false; //TODO: differentiate "failed to decrypt" from "wrong password"
+            }
+            rtl_cipher_encodeARCFOUR( pData->m_aCipher,
+                                      nEncryptedEntry.data(), 16,
+                                      nEncryptedEntry.data(), 16 ); // encrypt in place
+        }
+        bValid = (memcmp( nEncryptedEntry.data(), pData->m_aUEntry, 16 ) == 0);
+    }
+    return bValid;
+}
+
+bool PDFFile::usesSupportedEncryptionFormat() const
+{
+    return m_pData->m_bStandardHandler &&
+        m_pData->m_nAlgoVersion >= 1 &&
+        m_pData->m_nAlgoVersion <= 2 &&
+        m_pData->m_nStandardRevision >= 2 &&
+        m_pData->m_nStandardRevision <= 3;
+}
+
+bool PDFFile::setupDecryptionData( const OString& rPwd ) const
+{
+    if( !impl_getData()->m_bIsEncrypted )
+        return rPwd.isEmpty();
+
+    // check if we can handle this encryption at all
+    if( ! usesSupportedEncryptionFormat() )
+        return false;
+
+    if( ! m_pData->m_aCipher )
+        m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
+
+    // first try user password
+    bool bValid = check_user_password( rPwd, m_pData.get() );
+
+    if( ! bValid )
+    {
+        // try owner password
+        // see PDF reference 1.4 Algorithm 3.7
+        sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
+        sal_uInt8 nPwd[ENCRYPTION_BUF_LEN] = {};
+        sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData.get(), true );
+        if( m_pData->m_nStandardRevision == 2 )
+        {
+            if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
+                                        aKey, nKeyLen, nullptr, 0 )
+                != rtl_Cipher_E_None)
+            {
+                return false; //TODO: differentiate "failed to decrypt" from "wrong password"
+            }
+            rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
+                                      m_pData->m_aOEntry, 32,
+                                      nPwd, 32 );
+        }
+        else if( m_pData->m_nStandardRevision == 3 )
+        {
+            memcpy( nPwd, m_pData->m_aOEntry, 32 );
+            for( int i = 19; i >= 0; i-- )
+            {
+                sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
+                for( size_t j = 0; j < sizeof(nTempKey); j++ )
+                    nTempKey[j] = sal_uInt8(aKey[j] ^ i);
+                if (rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
+                                            nTempKey, nKeyLen, nullptr, 0 )
+                    != rtl_Cipher_E_None)
+                {
+                    return false; //TODO: differentiate "failed to decrypt" from "wrong password"
+                }
+                rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
+                                          nPwd, 32,
+                                          nPwd, 32 ); // decrypt inplace
+            }
+        }
+        bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData.get() );
+    }
+
+    return bValid;
+}
+
+PDFFileImplData* PDFFile::impl_getData() const
+{
+    if( m_pData )
+        return m_pData.get();
+    m_pData.reset( new PDFFileImplData );
+    // check for encryption dict in a trailer
+    unsigned int nElements = m_aSubElements.size();
+    while( nElements-- > 0 )
+    {
+        PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements].get());
+        if( pTrailer && pTrailer->m_pDict )
+        {
+            // search doc id
+            PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
+            if( doc_id != pTrailer->m_pDict->m_aMap.end() )
+            {
+                PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
+                if( pArr && !pArr->m_aSubElements.empty() )
+                {
+                    PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0].get());
+                    if( pStr )
+                        m_pData->m_aDocID = pStr->getFilteredString();
+#if OSL_DEBUG_LEVEL > 0
+                    OUStringBuffer aTmp;
+                    for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
+                        aTmp.append(static_cast<sal_Int32>(sal_uInt8(m_pData->m_aDocID[i])), 16);
+                    SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << aTmp.makeStringAndClear() << ">");
+#endif
+                }
+            }
+            // search Encrypt entry
+            PDFDict::Map::iterator enc =
+                pTrailer->m_pDict->m_aMap.find( "Encrypt" );
+            if( enc != pTrailer->m_pDict->m_aMap.end() )
+            {
+                PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
+                if( ! pDict )
+                {
+                    PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
+                    if( pRef )
+                    {
+                        PDFObject* pObj = findObject( pRef );
+                        if( pObj && pObj->m_pObject )
+                            pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
+                    }
+                }
+                if( pDict )
+                {
+                    PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
+                    PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
+                    PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
+                    PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
+                    PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
+                    PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
+                    PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
+                    if( filter != pDict->m_aMap.end() )
+                    {
+                        m_pData->m_bIsEncrypted = true;
+                        m_pData->m_nKeyLength = 5;
+                        if( version != pDict->m_aMap.end() )
+                        {
+                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
+                            if( pNum )
+                                m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
+                        }
+                        if( m_pData->m_nAlgoVersion >= 3 )
+                            m_pData->m_nKeyLength = 16;
+                        if( len != pDict->m_aMap.end() )
+                        {
+                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
+                            if( pNum )
+                                m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
+                        }
+                        PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
+                        if( pFilter && pFilter->getFilteredName() == "Standard" )
+                            m_pData->m_bStandardHandler = true;
+                        if( o_ent != pDict->m_aMap.end() )
+                        {
+                            PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
+                            if( pString )
+                            {
+                                OString aEnt = pString->getFilteredString();
+                                if( aEnt.getLength() == 32 )
+                                    memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
+#if OSL_DEBUG_LEVEL > 0
+                                else
+                                {
+                                    OUStringBuffer aTmp;
+                                    for( int i = 0; i < aEnt.getLength(); i++ )
+                                        aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
+                                    SAL_WARN("sdext.pdfimport.pdfparse",
+                                             "O entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
+                                }
+#endif
+                            }
+                        }
+                        if( u_ent != pDict->m_aMap.end() )
+                        {
+                            PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
+                            if( pString )
+                            {
+                                OString aEnt = pString->getFilteredString();
+                                if( aEnt.getLength() == 32 )
+                                    memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
+#if OSL_DEBUG_LEVEL > 0
+                                else
+                                {
+                                    OUStringBuffer aTmp;
+                                    for( int i = 0; i < aEnt.getLength(); i++ )
+                                        aTmp.append(" " + OUString::number(sal_uInt8(aEnt[i]), 16));
+                                    SAL_WARN("sdext.pdfimport.pdfparse",
+                                             "U entry has length " << static_cast<int>(aEnt.getLength()) << ", should be 32 <" << aTmp.makeStringAndClear() << ">" );
+                                }
+#endif
+                            }
+                        }
+                        if( r_ent != pDict->m_aMap.end() )
+                        {
+                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
+                            if( pNum )
+                                m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
+                        }
+                        if( p_ent != pDict->m_aMap.end() )
+                        {
+                            PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
+                            if( pNum )
+                                m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
+                            SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
+                        }
+
+                        SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? pFilter->getFilteredName() : OUString("<unknown>")) << ", version = " << static_cast<int>(m_pData->m_nAlgoVersion) << ", revision = " << static_cast<int>(m_pData->m_nStandardRevision) << ", key length = " << m_pData->m_nKeyLength );
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    return m_pData.get();
+}
+
+bool PDFFile::emit( EmitContext& rWriteContext ) const
+{
+    setEmitData(  rWriteContext, new EmitImplData( this ) );
+
+    OString aBuf =
+        "%PDF-" +
+        OString::number( sal_Int32( m_nMajor ) ) +
+        "." +
+        OString::number( sal_Int32( m_nMinor ) ) +
+        "\n";
+    if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
+        return false;
+    return emitSubElements( rWriteContext );
+}
+
+PDFEntry* PDFFile::clone() const
+{
+    PDFFile* pNewFl = new PDFFile();
+    pNewFl->m_nMajor = m_nMajor;
+    pNewFl->m_nMinor = m_nMinor;
+    cloneSubElements( pNewFl->m_aSubElements );
+    return pNewFl;
+}
+
+PDFPart::~PDFPart()
+{
+}
+
+bool PDFPart::emit( EmitContext& rWriteContext ) const
+{
+    return emitSubElements( rWriteContext );
+}
+
+PDFEntry* PDFPart::clone() const
+{
+    PDFPart* pNewPt = new PDFPart();
+    cloneSubElements( pNewPt->m_aSubElements );
+    return pNewPt;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
new file mode 100644
index 000000000..bbdb51c5a
--- /dev/null
+++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
@@ -0,0 +1,701 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <pdfparse.hxx>
+
+// boost using obsolete stuff
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable:4996)
+#pragma warning(disable:4503)
+#endif
+
+// workaround windows compiler: do not include multi_pass.hpp
+#include <boost/spirit/include/classic_core.hpp>
+#include <boost/spirit/include/classic_utility.hpp>
+#include <boost/spirit/include/classic_error_handling.hpp>
+#include <boost/spirit/include/classic_file_iterator.hpp>
+#include <boost/bind/bind.hpp>
+
+#include <string.h>
+
+#include <o3tl/safeint.hxx>
+#include <rtl/strbuf.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <sal/log.hxx>
+
+// disable warnings again because someone along the line has enabled them
+// (we have  included boost headers, what did you expect?)
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable:4996)
+#pragma warning(disable:4503)
+#endif
+
+
+using namespace boost::spirit::classic;
+using namespace pdfparse;
+
+namespace {
+
+class StringEmitContext : public EmitContext
+{
+    OStringBuffer m_aBuf;
+    public:
+    StringEmitContext() :  m_aBuf(256) {}
+
+    virtual bool write( const void* pBuf, unsigned int nLen ) noexcept override
+    {
+        m_aBuf.append( static_cast<const char*>(pBuf), nLen );
+        return true;
+    }
+    virtual unsigned int getCurPos() noexcept override { return m_aBuf.getLength(); }
+    virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) noexcept override
+    { return (nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) &&
+             write( m_aBuf.getStr() + nOrigOffset, nLen ); }
+    virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) noexcept override
+    {
+        if( nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) )
+        {
+            memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
+            return nLen;
+        }
+        return 0;
+    }
+
+    OString getString() { return m_aBuf.makeStringAndClear(); }
+};
+
+template< class iteratorT >
+class PDFGrammar :  public grammar< PDFGrammar<iteratorT> >
+{
+public:
+
+    explicit PDFGrammar( const iteratorT& first )
+    : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
+    ~PDFGrammar()
+    {
+        if( !m_aObjectStack.empty() )
+            delete m_aObjectStack.front();
+    }
+
+    double m_fDouble;
+    std::vector< unsigned int > m_aUIntStack;
+    std::vector< PDFEntry* >    m_aObjectStack;
+    OString                m_aErrorString;
+    iteratorT                   m_aGlobalBegin;
+
+public:
+    struct pdf_string_parser
+    {
+        typedef nil_t result_t;
+        template <typename ScannerT>
+        std::ptrdiff_t
+        operator()(ScannerT const& scan, result_t&) const
+        {
+            std::ptrdiff_t len = 0;
+
+            int nBraceLevel = 0;
+            while( ! scan.at_end() )
+            {
+                char c = *scan;
+                if( c == ')' )
+                {
+                    nBraceLevel--;
+                    if( nBraceLevel < 0 )
+                        break;
+                }
+                else if( c == '(' )
+                    nBraceLevel++;
+                else if( c == '\\' ) // ignore escaped braces
+                {
+                    ++len;
+                    ++scan.first;                 // tdf#63054: avoid skipping spaces
+                    if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces
+                        break;
+                }
+                ++len;
+                ++scan;
+            }
+            return scan.at_end() ? -1 : len;
+        }
+    };
+
+    template< typename ScannerT >
+    struct definition
+    {
+        explicit definition( const PDFGrammar<iteratorT>& rSelf )
+        {
+            using namespace boost::placeholders;
+
+            PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
+
+            // workaround workshop compiler: comment_p doesn't work
+            // comment     = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
+            comment     = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
+
+            boolean     = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
+
+            // workaround workshop compiler: confix_p doesn't work
+            //stream      = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
+            stream      = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
+
+            name        = lexeme_d[
+                            ch_p('/')
+                            >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
+                               [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
+
+            // workaround workshop compiler: confix_p doesn't work
+            //stringtype  = ( confix_p("(",*anychar_p, ")") |
+            //                confix_p("<",*xdigit_p,  ">") )
+            //              [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
+
+            stringtype  = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
+                            ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
+                          [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
+
+            null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
+
+            #ifdef USE_ASSIGN_ACTOR
+            objectref   = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
+                            >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+                            >> ch_p('R')
+                            >> eps_p
+                          )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
+            #else
+            objectref   = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+                            >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+                            >> ch_p('R')
+                            >> eps_p
+                          )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
+            #endif
+
+            #ifdef USE_ASSIGN_ACTOR
+            simple_type = objectref | name |
+                          ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
+                          [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
+                          | stringtype | boolean | null_object;
+            #else
+            simple_type = objectref | name |
+                          ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
+                          [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
+                          | stringtype | boolean | null_object;
+            #endif
+
+            dict_begin  = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
+            dict_end    = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
+
+            array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
+            array_end   = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
+
+            #ifdef USE_ASSIGN_ACTOR
+            object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
+                          >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+                          >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
+            #else
+            object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+                          >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+                          >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
+            #endif
+            object_end  = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
+
+            xref        = str_p( "xref" ) >> uint_p >> uint_p
+                          >> lexeme_d[
+                                +( repeat_p(10)[digit_p]
+                                   >> blank_p
+                                   >> repeat_p(5)[digit_p]
+                                   >> blank_p
+                                   >> ( ch_p('n') | ch_p('f') )
+                                   >> repeat_p(2)[space_p]
+                                 ) ];
+
+            dict_element= dict_begin | comment | simple_type
+                          | array_begin | array_end | dict_end;
+
+            object      = object_begin
+                          >> *dict_element
+                          >> !stream
+                          >> object_end;
+
+            trailer     = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
+                          >> *dict_element
+                          >> str_p("startxref")
+                          >> uint_p
+                          >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
+
+            #ifdef USE_ASSIGN_ACTOR
+            pdfrule     = ! (lexeme_d[
+                                str_p( "%PDF-" )
+                                >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+                                >> ch_p('.')
+                                >> uint_p[push_back_a(pSelf->m_aUIntStack)]
+                                >> *((~ch_p('\r') & ~ch_p('\n')))
+                                >> eol_p
+                             ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
+                          >> *( comment | object | ( xref >> trailer ) );
+            #else
+            pdfrule     = ! (lexeme_d[
+                                str_p( "%PDF-" )
+                                >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+                                >> ch_p('.')
+                                >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
+                                >> *(~ch_p('\r') & ~ch_p('\n'))
+                                >> eol_p
+                             ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
+                          >> *( comment | object | ( xref >> trailer ) );
+            #endif
+        }
+        rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
+                         objectref, array, value, dict_element, dict_begin, dict_end,
+                         array_begin, array_end, object, object_begin, object_end,
+                         xref, trailer, pdfrule;
+
+        const rule< ScannerT >& start() const { return pdfrule; }
+    };
+
+    #ifndef USE_ASSIGN_ACTOR
+    void push_back_action_uint( unsigned int i )
+    {
+        m_aUIntStack.push_back( i );
+    }
+    void assign_action_double( double d )
+    {
+        m_fDouble = d;
+    }
+    #endif
+
+    static void parseError( const char* pMessage, iteratorT pLocation )
+    {
+        throw_( pLocation, pMessage );
+    }
+
+    OString iteratorToString( iteratorT first, iteratorT last ) const
+    {
+        OStringBuffer aStr( 32 );
+        while( first != last )
+        {
+            aStr.append( *first );
+            ++first;
+        }
+        return aStr.makeStringAndClear();
+    }
+
+    void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
+    {
+        if( m_aObjectStack.empty() )
+        {
+            PDFFile* pFile = new PDFFile();
+            pFile->m_nMinor = m_aUIntStack.back();
+            m_aUIntStack.pop_back();
+            pFile->m_nMajor = m_aUIntStack.back();
+            m_aUIntStack.pop_back();
+            m_aObjectStack.push_back( pFile );
+        }
+        else
+            parseError( "found file header in unusual place", pBegin );
+    }
+
+    void pushComment( iteratorT first, iteratorT last )
+    {
+        // add a comment to the current stack element
+        PDFComment* pComment =
+            new PDFComment(iteratorToString(first,last));
+        if( m_aObjectStack.empty() )
+            m_aObjectStack.push_back( new PDFPart() );
+        PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+        if( pContainer == nullptr )
+            parseError( "comment without container", first );
+        pContainer->m_aSubElements.emplace_back( pComment );
+    }
+
+    void insertNewValue( std::unique_ptr<PDFEntry> pNewValue, iteratorT pPos )
+    {
+        PDFContainer* pContainer = nullptr;
+        const char* pMsg = nullptr;
+        if( ! m_aObjectStack.empty() )
+        {
+            pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+            if (pContainer)
+            {
+                if( dynamic_cast<PDFDict*>(pContainer) == nullptr &&
+                    dynamic_cast<PDFArray*>(pContainer) == nullptr )
+                {
+                    PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
+                    if( pObj )
+                    {
+                        if( pObj->m_pObject == nullptr )
+                            pObj->m_pObject = pNewValue.get();
+                        else
+                        {
+                            pMsg = "second value for object";
+                            pContainer = nullptr;
+                        }
+                    }
+                    else if( dynamic_cast<PDFDict*>(pNewValue.get()) )
+                    {
+                        PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
+                        if( pTrailer )
+                        {
+                            if( pTrailer->m_pDict == nullptr )
+                                pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue.get());
+                            else
+                                pContainer = nullptr;
+                        }
+                        else
+                            pContainer = nullptr;
+                    }
+                    else
+                        pContainer = nullptr;
+                }
+            }
+        }
+        if( pContainer )
+            pContainer->m_aSubElements.emplace_back( std::move(pNewValue) );
+        else
+        {
+            if( ! pMsg )
+            {
+                if( dynamic_cast<PDFContainer*>(pNewValue.get()) )
+                    pMsg = "array without container";
+                else
+                    pMsg = "value without container";
+            }
+            parseError( pMsg, pPos );
+        }
+    }
+
+    void pushName( iteratorT first, iteratorT last )
+    {
+        insertNewValue( std::make_unique<PDFName>(iteratorToString(first,last)), first );
+    }
+
+    void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
+    {
+        insertNewValue( std::make_unique<PDFNumber>(m_fDouble), first );
+    }
+
+    void pushString( iteratorT first, iteratorT last )
+    {
+        insertNewValue( std::make_unique<PDFString>(iteratorToString(first,last)), first );
+    }
+
+    void pushBool( iteratorT first, iteratorT last )
+    {
+        insertNewValue( std::make_unique<PDFBool>( last-first == 4 ), first );
+    }
+
+    void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        insertNewValue( std::make_unique<PDFNull>(), first );
+    }
+
+
+    void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
+    {
+        if( m_aObjectStack.empty() )
+            m_aObjectStack.push_back( new PDFPart() );
+
+        unsigned int nGeneration = m_aUIntStack.back();
+        m_aUIntStack.pop_back();
+        unsigned int nObject = m_aUIntStack.back();
+        m_aUIntStack.pop_back();
+
+        PDFObject* pObj = new PDFObject( nObject, nGeneration );
+        pObj->m_nOffset = first - m_aGlobalBegin;
+
+        PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+        if( pContainer &&
+            ( dynamic_cast<PDFFile*>(pContainer) ||
+              dynamic_cast<PDFPart*>(pContainer) ) )
+        {
+            pContainer->m_aSubElements.emplace_back( pObj );
+            m_aObjectStack.push_back( pObj );
+        }
+        else
+            parseError( "object in wrong place", first );
+    }
+
+    void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        if( m_aObjectStack.empty() )
+            parseError( "endobj without obj", first );
+        else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == nullptr )
+            parseError( "spurious endobj", first );
+        else
+            m_aObjectStack.pop_back();
+    }
+
+    void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        unsigned int nGeneration = m_aUIntStack.back();
+        m_aUIntStack.pop_back();
+        unsigned int nObject = m_aUIntStack.back();
+        m_aUIntStack.pop_back();
+        insertNewValue( std::make_unique<PDFObjectRef>(nObject,nGeneration), first );
+    }
+
+    void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        PDFDict* pDict = new PDFDict();
+        pDict->m_nOffset = first - m_aGlobalBegin;
+
+        insertNewValue( std::unique_ptr<PDFEntry>(pDict), first );
+        // will not come here if insertion fails (exception)
+        m_aObjectStack.push_back( pDict );
+    }
+    void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        PDFDict* pDict = nullptr;
+        if( m_aObjectStack.empty() )
+            parseError( "dictionary end without begin", first );
+        else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == nullptr )
+            parseError( "spurious dictionary end", first );
+        else
+            m_aObjectStack.pop_back();
+
+        PDFEntry* pOffender = pDict->buildMap();
+        if( pOffender )
+        {
+            StringEmitContext aCtx;
+            aCtx.write( "offending dictionary element: ", 30 );
+            pOffender->emit( aCtx );
+            m_aErrorString = aCtx.getString();
+            parseError( m_aErrorString.getStr(), first );
+        }
+    }
+
+    void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        PDFArray* pArray = new PDFArray();
+        pArray->m_nOffset = first - m_aGlobalBegin;
+
+        insertNewValue( std::unique_ptr<PDFEntry>(pArray), first );
+        // will not come here if insertion fails (exception)
+        m_aObjectStack.push_back( pArray );
+    }
+
+    void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        if( m_aObjectStack.empty() )
+            parseError( "array end without begin", first );
+        else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == nullptr )
+            parseError( "spurious array end", first );
+        else
+            m_aObjectStack.pop_back();
+    }
+
+    void emitStream( iteratorT first, iteratorT last )
+    {
+        if( m_aObjectStack.empty() )
+            parseError( "stream without object", first );
+        PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
+        if( pObj && pObj->m_pObject )
+        {
+            if( pObj->m_pStream )
+                parseError( "multiple streams in object", first );
+
+            PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
+            if( pDict )
+            {
+                PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
+
+                pObj->m_pStream = pStream;
+                pObj->m_aSubElements.emplace_back( pStream );
+            }
+        }
+        else
+            parseError( "stream without object", first );
+    }
+
+    void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        if( m_aObjectStack.empty() )
+            m_aObjectStack.push_back( new PDFPart() );
+
+        PDFTrailer* pTrailer = new PDFTrailer();
+        pTrailer->m_nOffset = first - m_aGlobalBegin;
+
+        PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
+        if( pContainer &&
+            ( dynamic_cast<PDFFile*>(pContainer) ||
+              dynamic_cast<PDFPart*>(pContainer) ) )
+        {
+            pContainer->m_aSubElements.emplace_back( pTrailer );
+            m_aObjectStack.push_back( pTrailer );
+        }
+        else
+            parseError( "trailer in wrong place", first );
+    }
+
+    void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
+    {
+        if( m_aObjectStack.empty() )
+            parseError( "%%EOF without trailer", first );
+        else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == nullptr )
+            parseError( "spurious %%EOF", first );
+        else
+            m_aObjectStack.pop_back();
+    }
+};
+
+}
+
+#ifdef _WIN32
+std::unique_ptr<PDFEntry> PDFReader::read( const char* pBuffer, unsigned int nLen )
+{
+    PDFGrammar<const char*> aGrammar( pBuffer );
+
+    try
+    {
+#if OSL_DEBUG_LEVEL > 0
+        boost::spirit::classic::parse_info<const char*> aInfo =
+#endif
+            boost::spirit::classic::parse( pBuffer,
+                                  pBuffer+nLen,
+                                  aGrammar,
+                                  boost::spirit::classic::space_p );
+#if OSL_DEBUG_LEVEL > 0
+        SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << static_cast<int>(aInfo.length) );
+#endif
+    }
+    catch( const parser_error<const char*, const char*>& rError )
+    {
+#if OSL_DEBUG_LEVEL > 0
+        OString aTmp;
+        unsigned int nElem = aGrammar.m_aObjectStack.size();
+        for( unsigned int i = 0; i < nElem; i++ )
+            aTmp += OString::Concat("   ") + typeid( *(aGrammar.m_aObjectStack[i]) ).name();
+
+        SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
+#else
+        (void)rError;
+#endif
+    }
+
+    std::unique_ptr<PDFEntry> pRet;
+    unsigned int nEntries = aGrammar.m_aObjectStack.size();
+    if( nEntries == 1 )
+    {
+        pRet.reset(aGrammar.m_aObjectStack.back());
+        aGrammar.m_aObjectStack.pop_back();
+    }
+#if OSL_DEBUG_LEVEL > 0
+    else if( nEntries > 1 )
+        SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
+#endif
+
+    return pRet;
+}
+#endif
+
+std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
+{
+#ifdef _WIN32
+    /* #i106583#
+       since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
+       C++ stdlib istream_iterator does not allow "-" apparently
+       using spirit 2.0 doesn't work in our environment with the MSC
+
+       So for the time being bite the bullet and read the whole file.
+       FIXME: give Spirit 2.x another try when we upgrade boost again.
+    */
+    std::unique_ptr<PDFEntry> pRet;
+    FILE* fp = fopen( pFileName, "rb" );
+    if( fp )
+    {
+        fseek( fp, 0, SEEK_END );
+        unsigned int nLen = static_cast<unsigned int>(ftell( fp ));
+        fseek( fp, 0, SEEK_SET );
+        char* pBuf = static_cast<char*>(std::malloc( nLen ));
+        if( pBuf )
+        {
+            fread( pBuf, 1, nLen, fp );
+            pRet = read( pBuf, nLen );
+            std::free( pBuf );
+        }
+        fclose( fp );
+    }
+    return pRet;
+#else
+    file_iterator<> file_start( pFileName );
+    if( ! file_start )
+        return nullptr;
+    file_iterator<> file_end = file_start.make_end();
+    PDFGrammar< file_iterator<> > aGrammar( file_start );
+
+    try
+    {
+#if OSL_DEBUG_LEVEL > 0
+        boost::spirit::classic::parse_info< file_iterator<> > aInfo =
+#endif
+            boost::spirit::classic::parse( file_start,
+                                  file_end,
+                                  aGrammar,
+                                  boost::spirit::classic::space_p );
+#if OSL_DEBUG_LEVEL > 0
+        SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
+#endif
+    }
+    catch( const parser_error< const char*, file_iterator<> >& rError )
+    {
+        SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
+#if OSL_DEBUG_LEVEL > 0
+        OUStringBuffer aTmp;
+        unsigned int nElem = aGrammar.m_aObjectStack.size();
+        for( unsigned int i = 0; i < nElem; i++ )
+        {
+            aTmp.append("   ");
+            aTmp.appendAscii(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
+        }
+        SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp.makeStringAndClear());
+#endif
+    }
+
+    std::unique_ptr<PDFEntry> pRet;
+    unsigned int nEntries = aGrammar.m_aObjectStack.size();
+    if( nEntries == 1 )
+    {
+        pRet.reset(aGrammar.m_aObjectStack.back());
+        aGrammar.m_aObjectStack.pop_back();
+    }
+#if OSL_DEBUG_LEVEL > 0
+    else if( nEntries > 1 )
+    {
+        SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
+        for( unsigned int i = 0; i < nEntries; i++ )
+        {
+            SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
+            PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
+            if( pObj )
+                SAL_WARN("sdext.pdfimport.pdfparse", "   -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
+            else
+                SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
+        }
+    }
+#endif
+    return pRet;
+#endif // WIN32
+}
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:06:44 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:06:44 +0000
commit	ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree	7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /sdext/source/pdfimport/pdfparse
parent	Initial commit. (diff)
download	libreoffice-cb75148ebd0135178ff46f89a30139c44f8d2040.tar.xz libreoffice-cb75148ebd0135178ff46f89a30139c44f8d2040.zip