diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
commit | 940b4d1848e8c70ab7642901a68594e8016caffc (patch) | |
tree | eb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /sdext/source/pdfimport/pdfparse/pdfparse.cxx | |
parent | Initial commit. (diff) | |
download | libreoffice-upstream.tar.xz libreoffice-upstream.zip |
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sdext/source/pdfimport/pdfparse/pdfparse.cxx')
-rw-r--r-- | sdext/source/pdfimport/pdfparse/pdfparse.cxx | 699 |
1 files changed, 699 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx b/sdext/source/pdfimport/pdfparse/pdfparse.cxx new file mode 100644 index 000000000..e61d90088 --- /dev/null +++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx @@ -0,0 +1,699 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <pdfparse.hxx> + +// boost using obsolete stuff +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable:4996) +#pragma warning(disable:4503) +#endif + +// workaround windows compiler: do not include multi_pass.hpp +#include <boost/spirit/include/classic_core.hpp> +#include <boost/spirit/include/classic_utility.hpp> +#include <boost/spirit/include/classic_error_handling.hpp> +#include <boost/spirit/include/classic_file_iterator.hpp> +#include <boost/bind.hpp> + +#include <string.h> + +#include <o3tl/safeint.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/ustrbuf.hxx> +#include <sal/log.hxx> + +// disable warnings again because someone along the line has enabled them +// (we have included boost headers, what did you expect?) +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable:4996) +#pragma warning(disable:4503) +#endif + + +using namespace boost::spirit::classic; +using namespace pdfparse; + +namespace { + +class StringEmitContext : public EmitContext +{ + OStringBuffer m_aBuf; + public: + StringEmitContext() : EmitContext(), m_aBuf(256) {} + + virtual bool write( const void* pBuf, unsigned int nLen ) throw() override + { + m_aBuf.append( static_cast<const char*>(pBuf), nLen ); + return true; + } + virtual unsigned int getCurPos() throw() override { return m_aBuf.getLength(); } + virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() override + { return (nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) && + write( m_aBuf.getStr() + nOrigOffset, nLen ); } + virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() override + { + if( nOrigOffset+nLen < o3tl::make_unsigned(m_aBuf.getLength()) ) + { + memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen ); + return nLen; + } + return 0; + } + + OString getString() { return m_aBuf.makeStringAndClear(); } +}; + +template< class iteratorT > +class PDFGrammar : public grammar< PDFGrammar<iteratorT> > +{ +public: + + explicit PDFGrammar( const iteratorT& first ) + : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {} + ~PDFGrammar() + { + if( !m_aObjectStack.empty() ) + delete m_aObjectStack.front(); + } + + double m_fDouble; + std::vector< unsigned int > m_aUIntStack; + std::vector< PDFEntry* > m_aObjectStack; + OString m_aErrorString; + iteratorT m_aGlobalBegin; + +public: + struct pdf_string_parser + { + typedef nil_t result_t; + template <typename ScannerT> + std::ptrdiff_t + operator()(ScannerT const& scan, result_t&) const + { + std::ptrdiff_t len = 0; + + int nBraceLevel = 0; + while( ! scan.at_end() ) + { + char c = *scan; + if( c == ')' ) + { + nBraceLevel--; + if( nBraceLevel < 0 ) + break; + } + else if( c == '(' ) + nBraceLevel++; + else if( c == '\\' ) // ignore escaped braces + { + ++len; + ++scan.first; // tdf#63054: avoid skipping spaces + if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces + break; + } + ++len; + ++scan; + } + return scan.at_end() ? -1 : len; + } + }; + + template< typename ScannerT > + struct definition + { + explicit definition( const PDFGrammar<iteratorT>& rSelf ) + { + PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf ); + + // workaround workshop compiler: comment_p doesn't work + // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )]; + comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ]; + + boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)]; + + // workaround workshop compiler: confix_p doesn't work + //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )]; + stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )]; + + name = lexeme_d[ + ch_p('/') + >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0'))) + [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ]; + + // workaround workshop compiler: confix_p doesn't work + //stringtype = ( confix_p("(",*anychar_p, ")") | + // confix_p("<",*xdigit_p, ">") ) + // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)]; + + stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) | + ( ch_p('<') >> *xdigit_p >> ch_p('>') ) ) + [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)]; + + null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)]; + + #ifdef USE_ASSIGN_ACTOR + objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)] + >> uint_p[push_back_a(pSelf->m_aUIntStack)] + >> ch_p('R') + >> eps_p + )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)]; + #else + objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] + >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] + >> ch_p('R') + >> eps_p + )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)]; + #endif + + #ifdef USE_ASSIGN_ACTOR + simple_type = objectref | name | + ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p ) + [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)] + | stringtype | boolean | null_object; + #else + simple_type = objectref | name | + ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p ) + [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)] + | stringtype | boolean | null_object; + #endif + + dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)]; + dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)]; + + array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)]; + array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)]; + + #ifdef USE_ASSIGN_ACTOR + object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)] + >> uint_p[push_back_a(pSelf->m_aUIntStack)] + >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)]; + #else + object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] + >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] + >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)]; + #endif + object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)]; + + xref = str_p( "xref" ) >> uint_p >> uint_p + >> lexeme_d[ + +( repeat_p(10)[digit_p] + >> blank_p + >> repeat_p(5)[digit_p] + >> blank_p + >> ( ch_p('n') | ch_p('f') ) + >> repeat_p(2)[space_p] + ) ]; + + dict_element= dict_begin | comment | simple_type + | array_begin | array_end | dict_end; + + object = object_begin + >> *dict_element + >> !stream + >> object_end; + + trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)] + >> *dict_element + >> str_p("startxref") + >> uint_p + >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)]; + + #ifdef USE_ASSIGN_ACTOR + pdfrule = ! (lexeme_d[ + str_p( "%PDF-" ) + >> uint_p[push_back_a(pSelf->m_aUIntStack)] + >> ch_p('.') + >> uint_p[push_back_a(pSelf->m_aUIntStack)] + >> *((~ch_p('\r') & ~ch_p('\n'))) + >> eol_p + ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)] + >> *( comment | object | ( xref >> trailer ) ); + #else + pdfrule = ! (lexeme_d[ + str_p( "%PDF-" ) + >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] + >> ch_p('.') + >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] + >> *(~ch_p('\r') & ~ch_p('\n')) + >> eol_p + ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)] + >> *( comment | object | ( xref >> trailer ) ); + #endif + } + rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type, + objectref, array, value, dict_element, dict_begin, dict_end, + array_begin, array_end, object, object_begin, object_end, + xref, trailer, pdfrule; + + const rule< ScannerT >& start() const { return pdfrule; } + }; + + #ifndef USE_ASSIGN_ACTOR + void push_back_action_uint( unsigned int i ) + { + m_aUIntStack.push_back( i ); + } + void assign_action_double( double d ) + { + m_fDouble = d; + } + #endif + + static void parseError( const char* pMessage, iteratorT pLocation ) + { + throw_( pLocation, pMessage ); + } + + OString iteratorToString( iteratorT first, iteratorT last ) const + { + OStringBuffer aStr( 32 ); + while( first != last ) + { + aStr.append( *first ); + ++first; + } + return aStr.makeStringAndClear(); + } + + void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ ) + { + if( m_aObjectStack.empty() ) + { + PDFFile* pFile = new PDFFile(); + pFile->m_nMinor = m_aUIntStack.back(); + m_aUIntStack.pop_back(); + pFile->m_nMajor = m_aUIntStack.back(); + m_aUIntStack.pop_back(); + m_aObjectStack.push_back( pFile ); + } + else + parseError( "found file header in unusual place", pBegin ); + } + + void pushComment( iteratorT first, iteratorT last ) + { + // add a comment to the current stack element + PDFComment* pComment = + new PDFComment(iteratorToString(first,last)); + if( m_aObjectStack.empty() ) + m_aObjectStack.push_back( new PDFPart() ); + PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); + if( pContainer == nullptr ) + parseError( "comment without container", first ); + pContainer->m_aSubElements.emplace_back( pComment ); + } + + void insertNewValue( std::unique_ptr<PDFEntry> pNewValue, iteratorT pPos ) + { + PDFContainer* pContainer = nullptr; + const char* pMsg = nullptr; + if( ! m_aObjectStack.empty() ) + { + pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); + if (pContainer) + { + if( dynamic_cast<PDFDict*>(pContainer) == nullptr && + dynamic_cast<PDFArray*>(pContainer) == nullptr ) + { + PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer); + if( pObj ) + { + if( pObj->m_pObject == nullptr ) + pObj->m_pObject = pNewValue.get(); + else + { + pMsg = "second value for object"; + pContainer = nullptr; + } + } + else if( dynamic_cast<PDFDict*>(pNewValue.get()) ) + { + PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer); + if( pTrailer ) + { + if( pTrailer->m_pDict == nullptr ) + pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue.get()); + else + pContainer = nullptr; + } + else + pContainer = nullptr; + } + else + pContainer = nullptr; + } + } + } + if( pContainer ) + pContainer->m_aSubElements.emplace_back( std::move(pNewValue) ); + else + { + if( ! pMsg ) + { + if( dynamic_cast<PDFContainer*>(pNewValue.get()) ) + pMsg = "array without container"; + else + pMsg = "value without container"; + } + parseError( pMsg, pPos ); + } + } + + void pushName( iteratorT first, iteratorT last ) + { + insertNewValue( std::make_unique<PDFName>(iteratorToString(first,last)), first ); + } + + void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ ) + { + insertNewValue( std::make_unique<PDFNumber>(m_fDouble), first ); + } + + void pushString( iteratorT first, iteratorT last ) + { + insertNewValue( std::make_unique<PDFString>(iteratorToString(first,last)), first ); + } + + void pushBool( iteratorT first, iteratorT last ) + { + insertNewValue( std::make_unique<PDFBool>( last-first == 4 ), first ); + } + + void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + insertNewValue( std::make_unique<PDFNull>(), first ); + } + + + void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ ) + { + if( m_aObjectStack.empty() ) + m_aObjectStack.push_back( new PDFPart() ); + + unsigned int nGeneration = m_aUIntStack.back(); + m_aUIntStack.pop_back(); + unsigned int nObject = m_aUIntStack.back(); + m_aUIntStack.pop_back(); + + PDFObject* pObj = new PDFObject( nObject, nGeneration ); + pObj->m_nOffset = first - m_aGlobalBegin; + + PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); + if( pContainer && + ( dynamic_cast<PDFFile*>(pContainer) || + dynamic_cast<PDFPart*>(pContainer) ) ) + { + pContainer->m_aSubElements.emplace_back( pObj ); + m_aObjectStack.push_back( pObj ); + } + else + parseError( "object in wrong place", first ); + } + + void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + if( m_aObjectStack.empty() ) + parseError( "endobj without obj", first ); + else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == nullptr ) + parseError( "spurious endobj", first ); + else + m_aObjectStack.pop_back(); + } + + void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + unsigned int nGeneration = m_aUIntStack.back(); + m_aUIntStack.pop_back(); + unsigned int nObject = m_aUIntStack.back(); + m_aUIntStack.pop_back(); + insertNewValue( std::make_unique<PDFObjectRef>(nObject,nGeneration), first ); + } + + void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + PDFDict* pDict = new PDFDict(); + pDict->m_nOffset = first - m_aGlobalBegin; + + insertNewValue( std::unique_ptr<PDFEntry>(pDict), first ); + // will not come here if insertion fails (exception) + m_aObjectStack.push_back( pDict ); + } + void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + PDFDict* pDict = nullptr; + if( m_aObjectStack.empty() ) + parseError( "dictionary end without begin", first ); + else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == nullptr ) + parseError( "spurious dictionary end", first ); + else + m_aObjectStack.pop_back(); + + PDFEntry* pOffender = pDict->buildMap(); + if( pOffender ) + { + StringEmitContext aCtx; + aCtx.write( "offending dictionary element: ", 30 ); + pOffender->emit( aCtx ); + m_aErrorString = aCtx.getString(); + parseError( m_aErrorString.getStr(), first ); + } + } + + void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + PDFArray* pArray = new PDFArray(); + pArray->m_nOffset = first - m_aGlobalBegin; + + insertNewValue( std::unique_ptr<PDFEntry>(pArray), first ); + // will not come here if insertion fails (exception) + m_aObjectStack.push_back( pArray ); + } + + void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + if( m_aObjectStack.empty() ) + parseError( "array end without begin", first ); + else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == nullptr ) + parseError( "spurious array end", first ); + else + m_aObjectStack.pop_back(); + } + + void emitStream( iteratorT first, iteratorT last ) + { + if( m_aObjectStack.empty() ) + parseError( "stream without object", first ); + PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back()); + if( pObj && pObj->m_pObject ) + { + if( pObj->m_pStream ) + parseError( "multiple streams in object", first ); + + PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject); + if( pDict ) + { + PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict ); + + pObj->m_pStream = pStream; + pObj->m_aSubElements.emplace_back( pStream ); + } + } + else + parseError( "stream without object", first ); + } + + void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + if( m_aObjectStack.empty() ) + m_aObjectStack.push_back( new PDFPart() ); + + PDFTrailer* pTrailer = new PDFTrailer(); + pTrailer->m_nOffset = first - m_aGlobalBegin; + + PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); + if( pContainer && + ( dynamic_cast<PDFFile*>(pContainer) || + dynamic_cast<PDFPart*>(pContainer) ) ) + { + pContainer->m_aSubElements.emplace_back( pTrailer ); + m_aObjectStack.push_back( pTrailer ); + } + else + parseError( "trailer in wrong place", first ); + } + + void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT ) + { + if( m_aObjectStack.empty() ) + parseError( "%%EOF without trailer", first ); + else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == nullptr ) + parseError( "spurious %%EOF", first ); + else + m_aObjectStack.pop_back(); + } +}; + +} + +#ifdef _WIN32 +std::unique_ptr<PDFEntry> PDFReader::read( const char* pBuffer, unsigned int nLen ) +{ + PDFGrammar<const char*> aGrammar( pBuffer ); + + try + { +#if OSL_DEBUG_LEVEL > 0 + boost::spirit::classic::parse_info<const char*> aInfo = +#endif + boost::spirit::classic::parse( pBuffer, + pBuffer+nLen, + aGrammar, + boost::spirit::classic::space_p ); +#if OSL_DEBUG_LEVEL > 0 + SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << static_cast<int>(aInfo.length) ); +#endif + } + catch( const parser_error<const char*, const char*>& rError ) + { +#if OSL_DEBUG_LEVEL > 0 + OString aTmp; + unsigned int nElem = aGrammar.m_aObjectStack.size(); + for( unsigned int i = 0; i < nElem; i++ ) + aTmp += OStringLiteral(" ") + typeid( *(aGrammar.m_aObjectStack[i]) ).name(); + + SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp); +#else + (void)rError; +#endif + } + + std::unique_ptr<PDFEntry> pRet; + unsigned int nEntries = aGrammar.m_aObjectStack.size(); + if( nEntries == 1 ) + { + pRet.reset(aGrammar.m_aObjectStack.back()); + aGrammar.m_aObjectStack.pop_back(); + } +#if OSL_DEBUG_LEVEL > 0 + else if( nEntries > 1 ) + SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" ); +#endif + + return pRet; +} +#endif + +std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName ) +{ +#ifdef _WIN32 + /* #i106583# + since converting to boost 1.39 file_iterator does not work anymore on all Windows systems + C++ stdlib istream_iterator does not allow "-" apparently + using spirit 2.0 doesn't work in our environment with the MSC + + So for the time being bite the bullet and read the whole file. + FIXME: give Spirit 2.x another try when we upgrade boost again. + */ + std::unique_ptr<PDFEntry> pRet; + FILE* fp = fopen( pFileName, "rb" ); + if( fp ) + { + fseek( fp, 0, SEEK_END ); + unsigned int nLen = static_cast<unsigned int>(ftell( fp )); + fseek( fp, 0, SEEK_SET ); + char* pBuf = static_cast<char*>(std::malloc( nLen )); + if( pBuf ) + { + fread( pBuf, 1, nLen, fp ); + pRet = read( pBuf, nLen ); + std::free( pBuf ); + } + fclose( fp ); + } + return pRet; +#else + file_iterator<> file_start( pFileName ); + if( ! file_start ) + return nullptr; + file_iterator<> file_end = file_start.make_end(); + PDFGrammar< file_iterator<> > aGrammar( file_start ); + + try + { +#if OSL_DEBUG_LEVEL > 0 + boost::spirit::classic::parse_info< file_iterator<> > aInfo = +#endif + boost::spirit::classic::parse( file_start, + file_end, + aGrammar, + boost::spirit::classic::space_p ); +#if OSL_DEBUG_LEVEL > 0 + SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length); +#endif + } + catch( const parser_error< const char*, file_iterator<> >& rError ) + { + SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start); +#if OSL_DEBUG_LEVEL > 0 + OUStringBuffer aTmp; + unsigned int nElem = aGrammar.m_aObjectStack.size(); + for( unsigned int i = 0; i < nElem; i++ ) + { + aTmp.append(" "); + aTmp.appendAscii(typeid( *(aGrammar.m_aObjectStack[i]) ).name()); + } + SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp.makeStringAndClear()); +#endif + } + + std::unique_ptr<PDFEntry> pRet; + unsigned int nEntries = aGrammar.m_aObjectStack.size(); + if( nEntries == 1 ) + { + pRet.reset(aGrammar.m_aObjectStack.back()); + aGrammar.m_aObjectStack.pop_back(); + } +#if OSL_DEBUG_LEVEL > 0 + else if( nEntries > 1 ) + { + SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse"); + for( unsigned int i = 0; i < nEntries; i++ ) + { + SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name()); + PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]); + if( pObj ) + SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration); + else + SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")"); + } + } +#endif + return pRet; +#endif // WIN32 +} + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |