diff options
Diffstat (limited to 'sdext/source/pdfimport/wrapper')
-rw-r--r-- | sdext/source/pdfimport/wrapper/keyword_list | 53 | ||||
-rw-r--r-- | sdext/source/pdfimport/wrapper/wrapper.cxx | 1253 |
2 files changed, 1306 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/wrapper/keyword_list b/sdext/source/pdfimport/wrapper/keyword_list new file mode 100644 index 000000000..bf15ddb24 --- /dev/null +++ b/sdext/source/pdfimport/wrapper/keyword_list @@ -0,0 +1,53 @@ +struct hash_entry { const char* name; enum parseKey eKey; }; + +#if defined __GNUC__ +#pragma GCC system_header +#endif + +%% +clipPath,CLIPPATH +drawChar,DRAWCHAR +drawImage,DRAWIMAGE +drawLink,DRAWLINK +drawMask,DRAWMASK +drawMaskedImage,DRAWMASKEDIMAGE +drawSoftMaskedImage,DRAWSOFTMASKEDIMAGE +endPage,ENDPAGE +endTextObject,ENDTEXTOBJECT +eoClipPath,EOCLIPPATH +eoFillPath,EOFILLPATH +fillPath,FILLPATH +hyperLink,HYPERLINK +intersectClip,INTERSECTCLIP +intersectEoClip,INTERSECTEOCLIP +popState,POPSTATE +pushState,PUSHSTATE +restoreState,RESTORESTATE +saveState,SAVESTATE +setBlendMode,SETBLENDMODE +setFillColor,SETFILLCOLOR +setFont,SETFONT +setLineCap,SETLINECAP +setLineDash,SETLINEDASH +setLineJoin,SETLINEJOIN +setLineWidth,SETLINEWIDTH +setMiterLimit,SETMITERLIMIT +setPageNum,SETPAGENUM +setStrokeColor,SETSTROKECOLOR +setTextRenderMode,SETTEXTRENDERMODE +setTransformation,SETTRANSFORMATION +startPage,STARTPAGE +strokePath,STROKEPATH +updateBlendMode,UPDATEBLENDMODE +updateCtm,UPDATECTM +updateFillColor,UPDATEFILLCOLOR +updateFillOpacity,UPDATEFILLOPACITY +updateFlatness,UPDATEFLATNESS +updateFont,UPDATEFONT +updateLineCap,UPDATELINECAP +updateLineDash,UPDATELINEDASH +updateLineJoin,UPDATELINEJOIN +updateLineWidth,UPDATELINEWIDTH +updateMiterLimit,UPDATEMITERLIMIT +updateStrokeColor,UPDATESTROKECOLOR +updateStrokeOpacity,UPDATESTROKEOPACITY diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx b/sdext/source/pdfimport/wrapper/wrapper.cxx new file mode 100644 index 000000000..5d2a2c0b6 --- /dev/null +++ b/sdext/source/pdfimport/wrapper/wrapper.cxx @@ -0,0 +1,1253 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <config_folders.h> + +#include <contentsink.hxx> +#include <pdfparse.hxx> +#include <pdfihelper.hxx> +#include <wrapper.hxx> + +#include <o3tl/string_view.hxx> +#include <osl/file.h> +#include <osl/file.hxx> +#include <osl/thread.h> +#include <osl/process.h> +#include <osl/diagnose.h> +#include <rtl/bootstrap.hxx> +#include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> +#include <sal/log.hxx> + +#include <comphelper/propertysequence.hxx> +#include <comphelper/string.hxx> +#include <com/sun/star/io/XInputStream.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <com/sun/star/rendering/PathCapType.hpp> +#include <com/sun/star/rendering/PathJoinType.hpp> +#include <com/sun/star/rendering/XPolyPolygon2D.hpp> +#include <com/sun/star/geometry/Matrix2D.hpp> +#include <com/sun/star/geometry/AffineMatrix2D.hpp> +#include <com/sun/star/geometry/RealRectangle2D.hpp> +#include <com/sun/star/geometry/RealSize2D.hpp> +#include <com/sun/star/task/XInteractionHandler.hpp> + +#include <basegfx/point/b2dpoint.hxx> +#include <basegfx/polygon/b2dpolypolygon.hxx> +#include <basegfx/polygon/b2dpolygon.hxx> +#include <basegfx/utils/unopolypolygon.hxx> + +#include <vcl/metric.hxx> +#include <vcl/font.hxx> +#include <vcl/virdev.hxx> + +#include <cstddef> +#include <memory> +#include <string_view> +#include <unordered_map> +#include <string.h> + +using namespace com::sun::star; + +namespace pdfi +{ + +namespace +{ + +// identifier of the strings coming from the out-of-process xpdf +// converter +enum parseKey { + CLIPPATH, + DRAWCHAR, + DRAWIMAGE, + DRAWLINK, + DRAWMASK, + DRAWMASKEDIMAGE, + DRAWSOFTMASKEDIMAGE, + ENDPAGE, + ENDTEXTOBJECT, + EOCLIPPATH, + EOFILLPATH, + FILLPATH, + HYPERLINK, + INTERSECTCLIP, + INTERSECTEOCLIP, + POPSTATE, + PUSHSTATE, + RESTORESTATE, + SAVESTATE, + SETBLENDMODE, + SETFILLCOLOR, + SETFONT, + SETLINECAP, + SETLINEDASH, + SETLINEJOIN, + SETLINEWIDTH, + SETMITERLIMIT, + SETPAGENUM, + SETSTROKECOLOR, + SETTEXTRENDERMODE, + SETTRANSFORMATION, + STARTPAGE, + STROKEPATH, + UPDATEBLENDMODE, + UPDATECTM, + UPDATEFILLCOLOR, + UPDATEFILLOPACITY, + UPDATEFLATNESS, + UPDATEFONT, + UPDATELINECAP, + UPDATELINEDASH, + UPDATELINEJOIN, + UPDATELINEWIDTH, + UPDATEMITERLIMIT, + UPDATESTROKECOLOR, + UPDATESTROKEOPACITY, + NONE +}; + +#if defined _MSC_VER && defined __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-register" +#pragma clang diagnostic ignored "-Wextra-tokens" +#endif +#include <hash.cxx> +#if defined _MSC_VER && defined __clang__ +#pragma clang diagnostic pop +#endif + +class Parser +{ + friend class LineParser; + + typedef std::unordered_map< sal_Int64, + FontAttributes > FontMapType; + + ScopedVclPtr<VirtualDevice> m_xDev; + const uno::Reference<uno::XComponentContext> m_xContext; + const ContentSinkSharedPtr m_pSink; + const oslFileHandle m_pErr; + FontMapType m_aFontMap; + +public: + Parser( const ContentSinkSharedPtr& rSink, + oslFileHandle pErr, + const uno::Reference<uno::XComponentContext>& xContext ) : + m_xContext(xContext), + m_pSink(rSink), + m_pErr(pErr), + m_aFontMap(101) + {} + + void parseLine( const OString& rLine ); +}; + +class LineParser { + Parser & m_parser; + OString m_aLine; + + static void parseFontFamilyName( FontAttributes& aResult ); + void readInt32( sal_Int32& o_Value ); + void readInt64( sal_Int64& o_Value ); + void readDouble( double& o_Value ); + void readBinaryData( uno::Sequence<sal_Int8>& rBuf ); + + uno::Sequence<beans::PropertyValue> readImageImpl(); + +public: + std::size_t m_nCharIndex = 0; + + LineParser(Parser & parser, OString const & line): m_parser(parser), m_aLine(line) {} + + std::string_view readNextToken(); + sal_Int32 readInt32(); + double readDouble(); + + uno::Reference<rendering::XPolyPolygon2D> readPath(); + + void readChar(); + void readLineCap(); + void readLineDash(); + void readLineJoin(); + void readTransformation(); + rendering::ARGBColor readColor(); + void readFont(); + + void readImage(); + void readMask(); + void readLink(); + void readMaskedImage(); + void readSoftMaskedImage(); +}; + +/** Unescapes line-ending characters in input string. These + characters are encoded as pairs of characters: '\\' 'n', resp. + '\\' 'r'. This function converts them back to '\n', resp. '\r'. + */ +OString lcl_unescapeLineFeeds(std::string_view i_rStr) +{ + const size_t nOrigLen(i_rStr.size()); + const char* const pOrig(i_rStr.data()); + std::unique_ptr<char[]> pBuffer(new char[nOrigLen + 1]); + + const char* pRead(pOrig); + char* pWrite(pBuffer.get()); + const char* pCur(pOrig); + while ((pCur = strchr(pCur, '\\')) != nullptr) + { + const char cNext(pCur[1]); + if (cNext == 'n' || cNext == 'r' || cNext == '\\') + { + const size_t nLen(pCur - pRead); + strncpy(pWrite, pRead, nLen); + pWrite += nLen; + *pWrite = cNext == 'n' ? '\n' : (cNext == 'r' ? '\r' : '\\'); + ++pWrite; + pCur = pRead = pCur + 2; + } + else + { + // Just continue on the next character. The current + // block will be copied the next time it goes through the + // 'if' branch. + ++pCur; + } + } + // maybe there are some data to copy yet + if (sal::static_int_cast<size_t>(pRead - pOrig) < nOrigLen) + { + const size_t nLen(nOrigLen - (pRead - pOrig)); + strncpy(pWrite, pRead, nLen); + pWrite += nLen; + } + *pWrite = '\0'; + + OString aResult(pBuffer.get()); + return aResult; +} + +std::string_view LineParser::readNextToken() +{ + if (m_nCharIndex == std::string_view::npos) { + SAL_WARN("sdext.pdfimport", "insufficient input"); + return {}; + } + return o3tl::getToken(m_aLine,' ',m_nCharIndex); +} + +void LineParser::readInt32( sal_Int32& o_Value ) +{ + std::string_view tok = readNextToken(); + o_Value = o3tl::toInt32(tok); +} + +sal_Int32 LineParser::readInt32() +{ + std::string_view tok = readNextToken(); + return o3tl::toInt32(tok); +} + +void LineParser::readInt64( sal_Int64& o_Value ) +{ + std::string_view tok = readNextToken(); + o_Value = o3tl::toInt64(tok); +} + +void LineParser::readDouble( double& o_Value ) +{ + std::string_view tok = readNextToken(); + o_Value = rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0, + nullptr, nullptr); +} + +double LineParser::readDouble() +{ + std::string_view tok = readNextToken(); + return rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0, + nullptr, nullptr); +} + +void LineParser::readBinaryData( uno::Sequence<sal_Int8>& rBuf ) +{ + sal_Int32 nFileLen( rBuf.getLength() ); + sal_Int8* pBuf( rBuf.getArray() ); + sal_uInt64 nBytesRead(0); + oslFileError nRes=osl_File_E_None; + while( nFileLen ) + { + nRes = osl_readFile( m_parser.m_pErr, pBuf, nFileLen, &nBytesRead ); + if (osl_File_E_None != nRes ) + break; + pBuf += nBytesRead; + nFileLen -= sal::static_int_cast<sal_Int32>(nBytesRead); + } + + OSL_PRECOND(nRes==osl_File_E_None, "inconsistent data"); +} + +uno::Reference<rendering::XPolyPolygon2D> LineParser::readPath() +{ + static const std::string_view aSubPathMarker( "subpath" ); + + if( readNextToken() != aSubPathMarker ) + OSL_PRECOND(false, "broken path"); + + basegfx::B2DPolyPolygon aResult; + while( m_nCharIndex != std::string_view::npos ) + { + basegfx::B2DPolygon aSubPath; + + sal_Int32 nClosedFlag; + readInt32( nClosedFlag ); + aSubPath.setClosed( nClosedFlag != 0 ); + + sal_Int32 nContiguousControlPoints(0); + + while( m_nCharIndex != std::string_view::npos ) + { + std::size_t nDummy=m_nCharIndex; + if (o3tl::getToken(m_aLine,' ',nDummy) == aSubPathMarker) { + break; + } + + sal_Int32 nCurveFlag; + double nX, nY; + readDouble( nX ); + readDouble( nY ); + readInt32( nCurveFlag ); + + aSubPath.append(basegfx::B2DPoint(nX,nY)); + if( nCurveFlag ) + { + ++nContiguousControlPoints; + } + else if( nContiguousControlPoints ) + { + OSL_PRECOND(nContiguousControlPoints==2,"broken bezier path"); + + // have two control points before us. the current one + // is a normal point - thus, convert previous points + // into bezier segment + const sal_uInt32 nPoints( aSubPath.count() ); + const basegfx::B2DPoint aCtrlA( aSubPath.getB2DPoint(nPoints-3) ); + const basegfx::B2DPoint aCtrlB( aSubPath.getB2DPoint(nPoints-2) ); + const basegfx::B2DPoint aEnd( aSubPath.getB2DPoint(nPoints-1) ); + aSubPath.remove(nPoints-3, 3); + aSubPath.appendBezierSegment(aCtrlA, aCtrlB, aEnd); + + nContiguousControlPoints=0; + } + } + + aResult.append( aSubPath ); + if( m_nCharIndex != std::string_view::npos ) + readNextToken(); + } + + return static_cast<rendering::XLinePolyPolygon2D*>( + new basegfx::unotools::UnoPolyPolygon(std::move(aResult))); +} + +void LineParser::readChar() +{ + double fontSize; + geometry::Matrix2D aUnoMatrix; + geometry::RealRectangle2D aRect; + + readDouble(aRect.X1); + readDouble(aRect.Y1); + readDouble(aRect.X2); + readDouble(aRect.Y2); + readDouble(aUnoMatrix.m00); + readDouble(aUnoMatrix.m01); + readDouble(aUnoMatrix.m10); + readDouble(aUnoMatrix.m11); + readDouble(fontSize); + + OString aChars; + + if (m_nCharIndex != std::string_view::npos) + aChars = lcl_unescapeLineFeeds( m_aLine.subView( m_nCharIndex ) ); + + // chars gobble up rest of line + m_nCharIndex = std::string_view::npos; + + m_parser.m_pSink->drawGlyphs(OStringToOUString(aChars, RTL_TEXTENCODING_UTF8), + aRect, aUnoMatrix, fontSize); +} + +void LineParser::readLineCap() +{ + sal_Int8 nCap(rendering::PathCapType::BUTT); + switch( readInt32() ) + { + default: + case 0: nCap = rendering::PathCapType::BUTT; break; + case 1: nCap = rendering::PathCapType::ROUND; break; + case 2: nCap = rendering::PathCapType::SQUARE; break; + } + m_parser.m_pSink->setLineCap(nCap); +} + +void LineParser::readLineDash() +{ + if( m_nCharIndex == std::string_view::npos ) + { + m_parser.m_pSink->setLineDash( uno::Sequence<double>(), 0.0 ); + return; + } + + const double nOffset(readDouble()); + const sal_Int32 nLen(readInt32()); + + uno::Sequence<double> aDashArray(nLen); + double* pArray=aDashArray.getArray(); + for( sal_Int32 i=0; i<nLen; ++i ) + *pArray++ = readDouble(); + + m_parser.m_pSink->setLineDash( aDashArray, nOffset ); +} + +void LineParser::readLineJoin() +{ + sal_Int8 nJoin(rendering::PathJoinType::MITER); + switch( readInt32() ) + { + default: + case 0: nJoin = rendering::PathJoinType::MITER; break; + case 1: nJoin = rendering::PathJoinType::ROUND; break; + case 2: nJoin = rendering::PathJoinType::BEVEL; break; + } + m_parser.m_pSink->setLineJoin(nJoin); +} + +void LineParser::readTransformation() +{ + geometry::AffineMatrix2D aMat; + readDouble(aMat.m00); + readDouble(aMat.m10); + readDouble(aMat.m01); + readDouble(aMat.m11); + readDouble(aMat.m02); + readDouble(aMat.m12); + m_parser.m_pSink->setTransformation( aMat ); +} + +rendering::ARGBColor LineParser::readColor() +{ + rendering::ARGBColor aRes; + readDouble(aRes.Red); + readDouble(aRes.Green); + readDouble(aRes.Blue); + readDouble(aRes.Alpha); + return aRes; +} + +/* Parse and convert the font family name (passed from xpdfimport) to correct font names +e.g. TimesNewRomanPSMT -> TimesNewRoman + TimesNewRomanPS-BoldMT -> TimesNewRoman + TimesNewRomanPS-BoldItalicMT -> TimesNewRoman +During the conversion, also apply the font features (bold italic etc) to the result. + +TODO: Further convert the font names to real font names in the system rather than the PS names. +e.g., TimesNewRoman -> Times New Roman +*/ +void LineParser::parseFontFamilyName( FontAttributes& rResult ) +{ + SAL_INFO("sdext.pdfimport", "Processing " << rResult.familyName << " ---"); + rResult.familyName = rResult.familyName.trim(); + for (const OUString& fontAttributesSuffix: fontAttributesSuffixes) + { + if ( rResult.familyName.endsWith(fontAttributesSuffix) ) + { + rResult.familyName = rResult.familyName.replaceAll(fontAttributesSuffix, ""); + SAL_INFO("sdext.pdfimport", rResult.familyName); + if (fontAttributesSuffix == u"Heavy" || fontAttributesSuffix == u"Black") + { + rResult.fontWeight = u"900"; + } + else if (fontAttributesSuffix == u"ExtraBold" || fontAttributesSuffix == u"UltraBold") + { + rResult.fontWeight = u"800"; + } + else if (fontAttributesSuffix == u"Bold") + { + rResult.fontWeight = u"bold"; + } + else if (fontAttributesSuffix == u"Semibold") + { + rResult.fontWeight = u"600"; + } + else if (fontAttributesSuffix == u"Medium") + { + rResult.fontWeight = u"500"; + } + else if (fontAttributesSuffix == u"Normal" || fontAttributesSuffix == u"Regular" || fontAttributesSuffix == u"Book") + { + rResult.fontWeight = u"400"; + } + else if (fontAttributesSuffix == u"Light") + { + rResult.fontWeight = u"300"; + } + else if (fontAttributesSuffix == u"ExtraLight" || fontAttributesSuffix == u"UltraLight") + { + rResult.fontWeight = u"200"; + } + else if (fontAttributesSuffix == u"Thin") + { + rResult.fontWeight = u"100"; + } + + if ( (fontAttributesSuffix == "Italic") or (fontAttributesSuffix == "Oblique") ) + { + rResult.isItalic = true; + } + } + } +} + +void LineParser::readFont() +{ + /* + xpdf line is like (separated by space): + updateFont <FontID> <isEmbedded> <maFontWeight> <isItalic> <isUnderline> <TransformedFontSize> <nEmbedSize> <FontName> + updateFont 14 1 4 0 0 1200.000000 23068 TimesNewRomanPSMT + + If nEmbedSize > 0, then a fontFile is followed as a stream. + */ + sal_Int64 nFontID; + sal_Int32 nIsEmbedded; + sal_Int32 nFontWeight; + sal_Int32 nIsItalic; + sal_Int32 nIsUnderline; + double nSize; + sal_Int32 nFileLen; + OString aFontName; + + readInt64(nFontID); // read FontID + readInt32(nIsEmbedded); // read isEmbedded + readInt32(nFontWeight); // read maFontWeight, see GfxFont enum Weight + readInt32(nIsItalic); // read isItalic + readInt32(nIsUnderline);// read isUnderline + readDouble(nSize); // read TransformedFontSize + readInt32(nFileLen); // read nEmbedSize + + nSize = nSize < 0.0 ? -nSize : nSize; + // Read FontName. From the current position to the end (any white spaces will be included). + aFontName = lcl_unescapeLineFeeds(m_aLine.subView(m_nCharIndex)); + + // name gobbles up rest of line + m_nCharIndex = std::string_view::npos; + + // Check if this font is already in our font map list. + // If yes, update the font size and skip. + Parser::FontMapType::const_iterator pFont( m_parser.m_aFontMap.find(nFontID) ); + if( pFont != m_parser.m_aFontMap.end() ) + { + OSL_PRECOND(nFileLen==0,"font data for known font"); + FontAttributes aRes(pFont->second); + aRes.size = nSize; + m_parser.m_pSink->setFont( aRes ); + + return; + } + + // The font is not yet in the map list - get info and add to map + OUString sFontWeight; // font weight name per ODF specifications + if (nFontWeight == 0 or nFontWeight == 4) // WeightNotDefined or W400, map to normal font + sFontWeight = u"normal"; + else if (nFontWeight == 1) // W100, Thin + sFontWeight = u"100"; + else if (nFontWeight == 2) // W200, Extra-Light + sFontWeight = u"200"; + else if (nFontWeight == 3) // W300, Light + sFontWeight = u"300"; + else if (nFontWeight == 5) // W500, Medium. Is this supported by ODF? + sFontWeight = u"500"; + else if (nFontWeight == 6) // W600, Semi-Bold + sFontWeight = u"600"; + else if (nFontWeight == 7) // W700, Bold + sFontWeight = u"bold"; + else if (nFontWeight == 8) // W800, Extra-Bold + sFontWeight = u"800"; + else if (nFontWeight == 9) // W900, Black + sFontWeight = u"900"; + SAL_INFO("sdext.pdfimport", "Font weight passed from xpdfimport is: " << sFontWeight); + + FontAttributes aResult( OStringToOUString( aFontName, RTL_TEXTENCODING_UTF8 ), + sFontWeight, + nIsItalic != 0, + nIsUnderline != 0, + nSize, + 1.0); + + /* The above font attributes (fontName, fontWeight, italic) are based on + xpdf line output and may not be reliable. To get correct attributes, + we do the following: + 1. Read the embedded font file and determine the attributes based on the + font file. + 2. If we failed to read the font file, or empty result is returned, then + determine the font attributes from the font name. + 3. If all these attempts have failed, then use a fallback font. + */ + if (nFileLen > 0) + { + uno::Sequence<sal_Int8> aFontFile(nFileLen); + readBinaryData(aFontFile); // Read fontFile. + + vcl::Font aFontReadResult = vcl::Font::identifyFont(aFontFile.getArray(), nFileLen); + SAL_INFO("sdext.pdfimport", "familyName: " << aFontReadResult.GetFamilyName()); + + if (!aFontReadResult.GetFamilyName().isEmpty()) // font detection successful + { + // Family name + aResult.familyName = aFontReadResult.GetFamilyName(); + SAL_INFO("sdext.pdfimport", aResult.familyName); + // tdf#143959: there are cases when the family name returned by font descriptor + // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name + // determined by parseFontFamilyName instead, but still determine the font + // attributes (bold italic etc) from the font descriptor. + if (aResult.familyName.getLength() > 7 and aResult.familyName.indexOf(u"+", 6) == 6) + { + aResult.familyName = aResult.familyName.copy(7, aResult.familyName.getLength() - 7); + parseFontFamilyName(aResult); + } + if (aResult.familyName.endsWithIgnoreAsciiCase("-VKana")) + { + parseFontFamilyName(aResult); + } + + // Font weight + if (aFontReadResult.GetWeight() == WEIGHT_THIN) + aResult.fontWeight = u"100"; + else if (aFontReadResult.GetWeight() == WEIGHT_ULTRALIGHT) + aResult.fontWeight = u"200"; + else if (aFontReadResult.GetWeight() == WEIGHT_LIGHT) + aResult.fontWeight = u"300"; + else if (aFontReadResult.GetWeight() == WEIGHT_SEMILIGHT) + aResult.fontWeight = u"350"; + // no need to check "normal" here as this is default in nFontWeight above + else if (aFontReadResult.GetWeight() == WEIGHT_SEMIBOLD) + aResult.fontWeight = u"600"; + else if (aFontReadResult.GetWeight() == WEIGHT_BOLD) + aResult.fontWeight = u"bold"; + else if (aFontReadResult.GetWeight() == WEIGHT_ULTRABOLD) + aResult.fontWeight = u"800"; + else if (aFontReadResult.GetWeight() == WEIGHT_BLACK) + aResult.fontWeight = u"900"; + SAL_INFO("sdext.pdfimport", aResult.fontWeight); + + // Italic + aResult.isItalic = (aFontReadResult.GetItalic() == ITALIC_OBLIQUE || + aFontReadResult.GetItalic() == ITALIC_NORMAL); + } else // font detection failed + { + SAL_WARN("sdext.pdfimport", + "Font detection from fontFile returned empty result. Guessing font info from font name."); + parseFontFamilyName(aResult); + } + + } else // no embedded font file - guess font attributes from font name + { + parseFontFamilyName(aResult); + } + + // last fallback + if (aResult.familyName.isEmpty()) + { + SAL_WARN("sdext.pdfimport", "Failed to determine the font, using a fallback font Arial."); + aResult.familyName = "Arial"; + } + + if (!m_parser.m_xDev) + m_parser.m_xDev.disposeAndReset(VclPtr<VirtualDevice>::Create()); + + vcl::Font font(aResult.familyName, Size(0, 1000)); + m_parser.m_xDev->SetFont(font); + FontMetric metric(m_parser.m_xDev->GetFontMetric()); + aResult.ascent = metric.GetAscent() / 1000.0; + + m_parser.m_aFontMap[nFontID] = aResult; + + aResult.size = nSize; + m_parser.m_pSink->setFont(aResult); +} + +uno::Sequence<beans::PropertyValue> LineParser::readImageImpl() +{ + std::string_view aToken = readNextToken(); + const sal_Int32 nImageSize( readInt32() ); + + OUString aFileName; + if( aToken == "PNG" ) + aFileName = "DUMMY.PNG"; + else if( aToken == "JPEG" ) + aFileName = "DUMMY.JPEG"; + else if( aToken == "PBM" ) + aFileName = "DUMMY.PBM"; + else + { + SAL_WARN_IF(aToken != "PPM","sdext.pdfimport","Invalid bitmap format"); + aFileName = "DUMMY.PPM"; + } + + uno::Sequence<sal_Int8> aDataSequence(nImageSize); + readBinaryData( aDataSequence ); + + uno::Sequence< uno::Any > aStreamCreationArgs{ uno::Any(aDataSequence) }; + + uno::Reference< uno::XComponentContext > xContext( m_parser.m_xContext, uno::UNO_SET_THROW ); + uno::Reference< lang::XMultiComponentFactory > xFactory( xContext->getServiceManager(), uno::UNO_SET_THROW ); + uno::Reference< io::XInputStream > xDataStream( + xFactory->createInstanceWithArgumentsAndContext( "com.sun.star.io.SequenceInputStream", aStreamCreationArgs, m_parser.m_xContext ), + uno::UNO_QUERY_THROW ); + + uno::Sequence<beans::PropertyValue> aSequence( comphelper::InitPropertySequence({ + { "URL", uno::Any(aFileName) }, + { "InputStream", uno::Any( xDataStream ) }, + { "InputSequence", uno::Any(aDataSequence) } + })); + + return aSequence; +} + +void LineParser::readImage() +{ + sal_Int32 nWidth, nHeight,nMaskColors; + readInt32(nWidth); + readInt32(nHeight); + readInt32(nMaskColors); + + uno::Sequence<beans::PropertyValue> aImg( readImageImpl() ); + + if( nMaskColors ) + { + uno::Sequence<sal_Int8> aDataSequence(nMaskColors); + readBinaryData( aDataSequence ); + + uno::Sequence<double> aMinRange(nMaskColors/2); + auto pMinRange = aMinRange.getArray(); + uno::Sequence<double> aMaxRange(nMaskColors/2); + auto pMaxRange = aMaxRange.getArray(); + for( sal_Int32 i=0; i<nMaskColors/2; ++i ) + { + pMinRange[i] = aDataSequence[i] / 255.0; + pMaxRange[i] = aDataSequence[i+nMaskColors/2] / 255.0; + } + + uno::Sequence<uno::Any> aMaskRanges{ uno::Any(aMinRange), uno::Any(aMaxRange) }; + m_parser.m_pSink->drawColorMaskedImage( aImg, aMaskRanges ); + } + else + m_parser.m_pSink->drawImage( aImg ); +} + +void LineParser::readMask() +{ + sal_Int32 nWidth, nHeight, nInvert; + readInt32(nWidth); + readInt32(nHeight); + readInt32(nInvert); + + m_parser.m_pSink->drawMask( readImageImpl(), nInvert != 0); +} + +void LineParser::readLink() +{ + geometry::RealRectangle2D aBounds; + readDouble(aBounds.X1); + readDouble(aBounds.Y1); + readDouble(aBounds.X2); + readDouble(aBounds.Y2); + + m_parser.m_pSink->hyperLink( aBounds, + OStringToOUString( lcl_unescapeLineFeeds( + m_aLine.subView(m_nCharIndex) ), + RTL_TEXTENCODING_UTF8 ) ); + // name gobbles up rest of line + m_nCharIndex = std::string_view::npos; +} + +void LineParser::readMaskedImage() +{ + sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight, nMaskInvert; + readInt32(nWidth); + readInt32(nHeight); + readInt32(nMaskWidth); + readInt32(nMaskHeight); + readInt32(nMaskInvert); + + const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() ); + const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() ); + m_parser.m_pSink->drawMaskedImage( aImage, aMask, nMaskInvert != 0 ); +} + +void LineParser::readSoftMaskedImage() +{ + sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight; + readInt32(nWidth); + readInt32(nHeight); + readInt32(nMaskWidth); + readInt32(nMaskHeight); + + const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() ); + const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() ); + m_parser.m_pSink->drawAlphaMaskedImage( aImage, aMask ); +} + +void Parser::parseLine( const OString& rLine ) +{ + OSL_PRECOND( m_pSink, "Invalid sink" ); + OSL_PRECOND( m_pErr, "Invalid filehandle" ); + OSL_PRECOND( m_xContext.is(), "Invalid service factory" ); + + LineParser lp(*this, rLine); + const std::string_view rCmd = lp.readNextToken(); + const hash_entry* pEntry = PdfKeywordHash::in_word_set( rCmd.data(), + rCmd.size() ); + OSL_ASSERT(pEntry); + switch( pEntry->eKey ) + { + case CLIPPATH: + m_pSink->intersectClip(lp.readPath()); break; + case DRAWCHAR: + lp.readChar(); break; + case DRAWIMAGE: + lp.readImage(); break; + case DRAWLINK: + lp.readLink(); break; + case DRAWMASK: + lp.readMask(); break; + case DRAWMASKEDIMAGE: + lp.readMaskedImage(); break; + case DRAWSOFTMASKEDIMAGE: + lp.readSoftMaskedImage(); break; + case ENDPAGE: + m_pSink->endPage(); break; + case ENDTEXTOBJECT: + m_pSink->endText(); break; + case EOCLIPPATH: + m_pSink->intersectEoClip(lp.readPath()); break; + case EOFILLPATH: + m_pSink->eoFillPath(lp.readPath()); break; + case FILLPATH: + m_pSink->fillPath(lp.readPath()); break; + case RESTORESTATE: + m_pSink->popState(); break; + case SAVESTATE: + m_pSink->pushState(); break; + case SETPAGENUM: + m_pSink->setPageNum( lp.readInt32() ); break; + case STARTPAGE: + { + const double nWidth ( lp.readDouble() ); + const double nHeight( lp.readDouble() ); + m_pSink->startPage( geometry::RealSize2D( nWidth, nHeight ) ); + break; + } + case STROKEPATH: + m_pSink->strokePath(lp.readPath()); break; + case UPDATECTM: + lp.readTransformation(); break; + case UPDATEFILLCOLOR: + m_pSink->setFillColor( lp.readColor() ); break; + case UPDATEFLATNESS: + m_pSink->setFlatness( lp.readDouble( ) ); break; + case UPDATEFONT: + lp.readFont(); break; + case UPDATELINECAP: + lp.readLineCap(); break; + case UPDATELINEDASH: + lp.readLineDash(); break; + case UPDATELINEJOIN: + lp.readLineJoin(); break; + case UPDATELINEWIDTH: + m_pSink->setLineWidth( lp.readDouble() );break; + case UPDATEMITERLIMIT: + m_pSink->setMiterLimit( lp.readDouble() ); break; + case UPDATESTROKECOLOR: + m_pSink->setStrokeColor( lp.readColor() ); break; + case UPDATESTROKEOPACITY: + break; + case SETTEXTRENDERMODE: + m_pSink->setTextRenderMode( lp.readInt32() ); break; + + case NONE: + default: + OSL_PRECOND(false,"Unknown input"); + break; + } + + // all consumed? + SAL_WARN_IF( + lp.m_nCharIndex!=std::string_view::npos, "sdext.pdfimport", "leftover scanner input"); +} + +} // namespace + +static bool checkEncryption( std::u16string_view i_rPath, + const uno::Reference< task::XInteractionHandler >& i_xIHdl, + OUString& io_rPwd, + bool& o_rIsEncrypted, + const OUString& i_rDocName + ) +{ + bool bSuccess = false; + OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() ); + + std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() )); + if( pEntry ) + { + pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get()); + if( pPDFFile ) + { + o_rIsEncrypted = pPDFFile->isEncrypted(); + if( o_rIsEncrypted ) + { + if( pPDFFile->usesSupportedEncryptionFormat() ) + { + bool bAuthenticated = false; + if( !io_rPwd.isEmpty() ) + { + OString aIsoPwd = OUStringToOString( io_rPwd, + RTL_TEXTENCODING_ISO_8859_1 ); + bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd.getStr() ); + } + if( bAuthenticated ) + bSuccess = true; + else + { + if( i_xIHdl.is() ) + { + bool bEntered = false; + do + { + bEntered = getPassword( i_xIHdl, io_rPwd, ! bEntered, i_rDocName ); + OString aIsoPwd = OUStringToOString( io_rPwd, + RTL_TEXTENCODING_ISO_8859_1 ); + bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd.getStr() ); + } while( bEntered && ! bAuthenticated ); + } + + bSuccess = bAuthenticated; + } + } + else if( i_xIHdl.is() ) + { + reportUnsupportedEncryptionFormat( i_xIHdl ); + //TODO: this should either be handled further down the + // call stack, or else information that this has already + // been handled should be passed down the call stack, so + // that SfxBaseModel::load does not show an additional + // "General Error" message box + } + } + else + bSuccess = true; + } + } + return bSuccess; +} + +namespace { + +class Buffering +{ + static const int SIZE = 64*1024; + std::unique_ptr<char[]> aBuffer; + oslFileHandle& pOut; + size_t pos; + sal_uInt64 left; + +public: + explicit Buffering(oslFileHandle& out) : aBuffer(new char[SIZE]), pOut(out), pos(0), left(0) {} + + oslFileError read(char *pChar, short count, sal_uInt64* pBytesRead) + { + oslFileError nRes = osl_File_E_None; + sal_uInt64 nBytesRead = 0; + while (count > 0) + { + if (left == 0) + { + nRes = osl_readFile(pOut, aBuffer.get(), SIZE, &left); + if (nRes != osl_File_E_None || left == 0) + { + *pBytesRead = nBytesRead; + return nRes; + } + pos = 0; + } + *pChar = aBuffer.get()[pos]; + --count; + ++pos; + --left; + ++pChar; + ++nBytesRead; + } + *pBytesRead = nBytesRead; + return osl_File_E_None; + } +}; + +} + +bool xpdf_ImportFromFile(const OUString& rURL, + const ContentSinkSharedPtr& rSink, + const uno::Reference<task::XInteractionHandler>& xIHdl, + const OUString& rPwd, + const uno::Reference<uno::XComponentContext>& xContext, + const OUString& rFilterOptions) +{ + OSL_ASSERT(rSink); + + OUString aSysUPath; + if( osl_getSystemPathFromFileURL( rURL.pData, &aSysUPath.pData ) != osl_File_E_None ) + { + SAL_WARN( + "sdext.pdfimport", + "getSystemPathFromFileURL(" << rURL << ") failed"); + return false; + } + OUString aDocName( rURL.copy( rURL.lastIndexOf( '/' )+1 ) ); + + // check for encryption, if necessary get password + OUString aPwd( rPwd ); + bool bIsEncrypted = false; + if( !checkEncryption( aSysUPath, xIHdl, aPwd, bIsEncrypted, aDocName ) ) + { + SAL_INFO( + "sdext.pdfimport", + "checkEncryption(" << aSysUPath << ") failed"); + return false; + } + + // Determine xpdfimport executable URL: + OUString converterURL("$BRAND_BASE_DIR/" LIBO_BIN_FOLDER "/xpdfimport"); + rtl::Bootstrap::expandMacros(converterURL); //TODO: detect failure + + // Determine pathname of xpdfimport_err.pdf: + OUString errPathname("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/xpdfimport/xpdfimport_err.pdf"); + rtl::Bootstrap::expandMacros(errPathname); //TODO: detect failure + if (osl::FileBase::getSystemPathFromFileURL(errPathname, errPathname) + != osl::FileBase::E_None) + { + SAL_WARN( + "sdext.pdfimport", + "getSystemPathFromFileURL(" << errPathname << ") failed"); + return false; + } + + // spawn separate process to keep LGPL/GPL code apart. + + OUString aOptFlag("-o"); + rtl_uString* args[] = { aSysUPath.pData, errPathname.pData, + aOptFlag.pData, rFilterOptions.pData }; + sal_Int32 nArgs = rFilterOptions.isEmpty() ? 2 : 4; + + oslProcess aProcess; + oslFileHandle pIn = nullptr; + oslFileHandle pOut = nullptr; + oslFileHandle pErr = nullptr; + oslSecurity pSecurity = osl_getCurrentSecurity (); + oslProcessError eErr = + osl_executeProcess_WithRedirectedIO(converterURL.pData, + args, + nArgs, + osl_Process_SEARCHPATH|osl_Process_HIDDEN, + pSecurity, + nullptr, nullptr, 0, + &aProcess, &pIn, &pOut, &pErr); + osl_freeSecurityHandle(pSecurity); + + bool bRet=true; + try + { + if( eErr!=osl_Process_E_None ) + { + SAL_WARN( + "sdext.pdfimport", + "executeProcess of " << converterURL << " failed with " + << +eErr); + return false; + } + + if( pIn ) + { + OStringBuffer aBuf(256); + if( bIsEncrypted ) + aBuf.append( OUStringToOString( aPwd, RTL_TEXTENCODING_ISO_8859_1 ) ); + aBuf.append( '\n' ); + + sal_uInt64 nWritten = 0; + osl_writeFile( pIn, aBuf.getStr(), sal_uInt64(aBuf.getLength()), &nWritten ); + } + + if( pOut && pErr ) + { + // read results of PDF parser. One line - one call to + // OutputDev. stderr is used for alternate streams, like + // embedded fonts and bitmaps + Parser aParser(rSink,pErr,xContext); + Buffering aBuffering(pOut); + OStringBuffer line; + for( ;; ) + { + char aChar('\n'); + sal_uInt64 nBytesRead; + oslFileError nRes; + + // skip garbage \r \n at start of line + for (;;) + { + nRes = aBuffering.read(&aChar, 1, &nBytesRead); + if (osl_File_E_None != nRes || nBytesRead != 1 || (aChar != '\n' && aChar != '\r') ) + break; + } + if ( osl_File_E_None != nRes ) + break; + + if( aChar != '\n' && aChar != '\r' ) + line.append( aChar ); + + for (;;) + { + nRes = aBuffering.read(&aChar, 1, &nBytesRead); + if ( osl_File_E_None != nRes || nBytesRead != 1 || aChar == '\n' || aChar == '\r' ) + break; + line.append( aChar ); + } + if ( osl_File_E_None != nRes ) + break; + if ( line.isEmpty() ) + break; + + aParser.parseLine(line.makeStringAndClear()); + } + } + } + catch( uno::Exception& ) + { + // crappy C file interface. need manual resource dealloc + bRet = false; + } + + if( pIn ) + osl_closeFile(pIn); + if( pOut ) + osl_closeFile(pOut); + if( pErr ) + osl_closeFile(pErr); + eErr = osl_joinProcess(aProcess); + if (eErr == osl_Process_E_None) + { + oslProcessInfo info; + info.Size = sizeof info; + eErr = osl_getProcessInfo(aProcess, osl_Process_EXITCODE, &info); + if (eErr == osl_Process_E_None) + { + if (info.Code != 0) + { + SAL_WARN( + "sdext.pdfimport", + "getProcessInfo of " << converterURL + << " failed with exit code " << info.Code); + bRet = false; + } + } + else + { + SAL_WARN( + "sdext.pdfimport", + "getProcessInfo of " << converterURL << " failed with " + << +eErr); + bRet = false; + } + } + else + { + SAL_WARN( + "sdext.pdfimport", + "joinProcess of " << converterURL << " failed with " << +eErr); + bRet = false; + } + osl_freeProcessHandle(aProcess); + return bRet; +} + + +bool xpdf_ImportFromStream( const uno::Reference< io::XInputStream >& xInput, + const ContentSinkSharedPtr& rSink, + const uno::Reference<task::XInteractionHandler >& xIHdl, + const OUString& rPwd, + const uno::Reference< uno::XComponentContext >& xContext, + const OUString& rFilterOptions ) +{ + OSL_ASSERT(xInput.is()); + OSL_ASSERT(rSink); + + // convert XInputStream to local temp file + oslFileHandle aFile = nullptr; + OUString aURL; + if( osl_createTempFile( nullptr, &aFile, &aURL.pData ) != osl_File_E_None ) + return false; + + // copy content, buffered... + const sal_uInt32 nBufSize = 4096; + uno::Sequence<sal_Int8> aBuf( nBufSize ); + sal_uInt64 nBytes = 0; + sal_uInt64 nWritten = 0; + bool bSuccess = true; + do + { + try + { + nBytes = xInput->readBytes( aBuf, nBufSize ); + } + catch( css::uno::Exception& ) + { + osl_closeFile( aFile ); + throw; + } + if( nBytes > 0 ) + { + osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten ); + if( nWritten != nBytes ) + { + bSuccess = false; + break; + } + } + } + while( nBytes == nBufSize ); + + osl_closeFile( aFile ); + + if ( bSuccess ) + bSuccess = xpdf_ImportFromFile( aURL, rSink, xIHdl, rPwd, xContext, rFilterOptions ); + osl_removeFile( aURL.pData ); + + return bSuccess; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |