diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
commit | ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch) | |
tree | 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /sdext/source/pdfimport/tree | |
parent | Initial commit. (diff) | |
download | libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip |
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | sdext/source/pdfimport/tree/drawtreevisiting.cxx | 1104 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/drawtreevisiting.hxx | 117 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/genericelements.cxx | 434 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/imagecontainer.cxx | 146 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/pdfiprocessor.cxx | 715 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/style.cxx | 248 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/style.hxx | 166 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/treevisitorfactory.cxx | 111 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/writertreevisiting.cxx | 1299 | ||||
-rw-r--r-- | sdext/source/pdfimport/tree/writertreevisiting.hxx | 110 |
10 files changed, 4450 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/tree/drawtreevisiting.cxx b/sdext/source/pdfimport/tree/drawtreevisiting.cxx new file mode 100644 index 000000000..95e2ce985 --- /dev/null +++ b/sdext/source/pdfimport/tree/drawtreevisiting.cxx @@ -0,0 +1,1104 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ +#include <sal/log.hxx> +#include <pdfiprocessor.hxx> +#include <xmlemitter.hxx> +#include <pdfihelper.hxx> +#include <imagecontainer.hxx> +#include "style.hxx" +#include "drawtreevisiting.hxx" +#include <genericelements.hxx> + +#include <basegfx/polygon/b2dpolypolygontools.hxx> +#include <osl/diagnose.h> +#include <rtl/math.hxx> +#include <com/sun/star/i18n/BreakIterator.hpp> +#include <com/sun/star/i18n/CharacterClassification.hpp> +#include <com/sun/star/i18n/ScriptType.hpp> +#include <com/sun/star/i18n/DirectionProperty.hpp> +#include <comphelper/string.hxx> + +#include <string.h> +#include <string_view> + +using namespace ::com::sun::star; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::uno; + +namespace pdfi +{ + +const Reference< XBreakIterator >& DrawXmlOptimizer::GetBreakIterator() +{ + if ( !mxBreakIter.is() ) + { + Reference< XComponentContext > xContext( m_rProcessor.m_xContext, uno::UNO_SET_THROW ); + mxBreakIter = BreakIterator::create(xContext); + } + return mxBreakIter; +} + +const Reference< XCharacterClassification >& DrawXmlEmitter::GetCharacterClassification() +{ + if ( !mxCharClass.is() ) + { + Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW ); + mxCharClass = CharacterClassification::create(xContext); + } + return mxCharClass; +} + +void DrawXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( elem.Children.empty() ) + return; + + const char* pType = dynamic_cast<DrawElement*>(elem.Children.front().get()) ? "draw:a" : "text:a"; + + PropertyMap aProps; + aProps[ "xlink:type" ] = "simple"; + aProps[ "xlink:href" ] = elem.URI; + aProps[ "office:target-frame-name" ] = "_blank"; + aProps[ "xlink:show" ] = "new"; + + m_rEmitContext.rEmitter.beginTag( pType, aProps ); + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + m_rEmitContext.rEmitter.endTag( pType ); +} + +void DrawXmlEmitter::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( elem.Text.isEmpty() ) + return; + + OUString strSpace(u' '); + OUString strNbSpace(u'\x00A0'); + OUString tabSpace(u'\x0009'); + PropertyMap aProps; + if( elem.StyleId != -1 ) + { + aProps[ OUString( "text:style-name" ) ] = + m_rEmitContext.rStyles.getStyleName( elem.StyleId ); + } + + OUString str(elem.Text.toString()); + + // Check for RTL + bool isRTL = false; + Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() ); + if( xCC.is() ) + { + for(int i=1; i< elem.Text.getLength(); i++) + { + css::i18n::DirectionProperty nType = static_cast<css::i18n::DirectionProperty>(xCC->getCharacterDirection( str, i )); + if ( nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT || + nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC || + nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING || + nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE + ) + isRTL = true; + } + } + + if (isRTL) // If so, reverse string + { + // First, produce mirrored-image for each code point which has the Bidi_Mirrored property. + str = PDFIProcessor::SubstituteBidiMirrored(str); + // Then, reverse the code points in the string, in backward order. + str = ::comphelper::string::reverseCodePoints(str); + } + + m_rEmitContext.rEmitter.beginTag( "text:span", aProps ); + + aProps = {}; + for(int i=0; i< elem.Text.getLength(); i++) + { + OUString strToken= str.copy(i,1) ; + if( strSpace == strToken || strNbSpace == strToken ) + { + aProps[ "text:c" ] = "1"; + m_rEmitContext.rEmitter.beginTag( "text:s", aProps ); + m_rEmitContext.rEmitter.endTag( "text:s"); + } + else + { + if( tabSpace == strToken ) + { + m_rEmitContext.rEmitter.beginTag( "text:tab", aProps ); + m_rEmitContext.rEmitter.endTag( "text:tab"); + } + else + { + m_rEmitContext.rEmitter.write( strToken ); + } + } + } + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + m_rEmitContext.rEmitter.endTag( "text:span" ); +} + +void DrawXmlEmitter::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + PropertyMap aProps; + if( elem.StyleId != -1 ) + { + aProps[ "text:style-name" ] = m_rEmitContext.rStyles.getStyleName( elem.StyleId ); + } + const char* pTagType = "text:p"; + if( elem.Type == ParagraphElement::Headline ) + pTagType = "text:h"; + m_rEmitContext.rEmitter.beginTag( pTagType, aProps ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + m_rEmitContext.rEmitter.endTag( pTagType ); +} + +void DrawXmlEmitter::fillFrameProps( DrawElement& rElem, + PropertyMap& rProps, + const EmitContext& rEmitContext, + bool bWasTransformed + ) +{ + rProps[ "draw:z-index" ] = OUString::number( rElem.ZOrder ); + rProps[ "draw:style-name"] = rEmitContext.rStyles.getStyleName( rElem.StyleId ); + + if (rElem.IsForText) + rProps["draw:text-style-name"] = rEmitContext.rStyles.getStyleName(rElem.TextStyleId); + + const GraphicsContext& rGC = + rEmitContext.rProcessor.getGraphicsContext( rElem.GCId ); + + if (bWasTransformed) + { + rProps[ "svg:x" ] = convertPixelToUnitString(rElem.x); + rProps[ "svg:y" ] = convertPixelToUnitString(rElem.y); + rProps[ "svg:width" ] = convertPixelToUnitString(rElem.w); + rProps[ "svg:height" ] = convertPixelToUnitString(rElem.h); + } + else + { + OUStringBuffer aBuf(256); + + basegfx::B2DHomMatrix mat(rGC.Transformation); + + if (rElem.MirrorVertical) + { + basegfx::B2DHomMatrix mat2; + mat2.translate(0, -0.5); + mat2.scale(1, -1); + mat2.translate(0, 0.5); + mat = mat * mat2; + } + + double scale = convPx2mm(100); + mat.scale(scale, scale); + + aBuf.append("matrix("); + aBuf.append(mat.get(0, 0)); + aBuf.append(' '); + aBuf.append(mat.get(1, 0)); + aBuf.append(' '); + aBuf.append(mat.get(0, 1)); + aBuf.append(' '); + aBuf.append(mat.get(1, 1)); + aBuf.append(' '); + aBuf.append(mat.get(0, 2)); + aBuf.append(' '); + aBuf.append(mat.get(1, 2)); + aBuf.append(")"); + + rProps["draw:transform"] = aBuf.makeStringAndClear(); + } +} + +void DrawXmlEmitter::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( elem.Children.empty() ) + return; + + bool bTextBox = (dynamic_cast<ParagraphElement*>(elem.Children.front().get()) != nullptr); + PropertyMap aFrameProps; + fillFrameProps( elem, aFrameProps, m_rEmitContext, false ); + m_rEmitContext.rEmitter.beginTag( "draw:frame", aFrameProps ); + if( bTextBox ) + m_rEmitContext.rEmitter.beginTag( "draw:text-box", PropertyMap() ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + if( bTextBox ) + m_rEmitContext.rEmitter.endTag( "draw:text-box" ); + m_rEmitContext.rEmitter.endTag( "draw:frame" ); +} + +void DrawXmlEmitter::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + elem.updateGeometry(); + /* note: + * aw recommends using 100dth of mm in all respects since the xml import + * (a) is buggy (see issue 37213) + * (b) is optimized for 100dth of mm and does not scale itself then, + * this does not gain us speed but makes for smaller rounding errors since + * the xml importer coordinates are integer based + */ + for (sal_uInt32 i = 0; i< elem.PolyPoly.count(); i++) + { + basegfx::B2DPolygon b2dPolygon = elem.PolyPoly.getB2DPolygon( i ); + + for ( sal_uInt32 j = 0; j< b2dPolygon.count(); j++ ) + { + basegfx::B2DPoint point; + basegfx::B2DPoint nextPoint; + point = b2dPolygon.getB2DPoint( j ); + + basegfx::B2DPoint prevPoint = b2dPolygon.getPrevControlPoint( j ) ; + + point.setX( convPx2mmPrec2( point.getX() )*100.0 ); + point.setY( convPx2mmPrec2( point.getY() )*100.0 ); + + if ( b2dPolygon.isPrevControlPointUsed( j ) ) + { + prevPoint.setX( convPx2mmPrec2( prevPoint.getX() )*100.0 ); + prevPoint.setY( convPx2mmPrec2( prevPoint.getY() )*100.0 ); + } + + if ( b2dPolygon.isNextControlPointUsed( j ) ) + { + nextPoint = b2dPolygon.getNextControlPoint( j ) ; + nextPoint.setX( convPx2mmPrec2( nextPoint.getX() )*100.0 ); + nextPoint.setY( convPx2mmPrec2( nextPoint.getY() )*100.0 ); + } + + b2dPolygon.setB2DPoint( j, point ); + + if ( b2dPolygon.isPrevControlPointUsed( j ) ) + b2dPolygon.setPrevControlPoint( j , prevPoint ) ; + + if ( b2dPolygon.isNextControlPointUsed( j ) ) + b2dPolygon.setNextControlPoint( j , nextPoint ) ; + } + + elem.PolyPoly.setB2DPolygon( i, b2dPolygon ); + } + + PropertyMap aProps; + // PDFIProcessor transforms geometrical objects, not images and text + // so we need to tell fillFrameProps here that the transformation for + // a PolyPolyElement was already applied (aside from translation) + fillFrameProps( elem, aProps, m_rEmitContext, true ); + OUStringBuffer aBuf( 64 ); + aBuf.append( "0 0 " ); + aBuf.append( convPx2mmPrec2(elem.w)*100.0 ); + aBuf.append( ' ' ); + aBuf.append( convPx2mmPrec2(elem.h)*100.0 ); + aProps[ "svg:viewBox" ] = aBuf.makeStringAndClear(); + aProps[ "svg:d" ] = basegfx::utils::exportToSvgD( elem.PolyPoly, false, true, false ); + + m_rEmitContext.rEmitter.beginTag( "draw:path", aProps ); + m_rEmitContext.rEmitter.endTag( "draw:path" ); +} + +void DrawXmlEmitter::visit( ImageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + PropertyMap aImageProps; + m_rEmitContext.rEmitter.beginTag( "draw:image", aImageProps ); + m_rEmitContext.rEmitter.beginTag( "office:binary-data", PropertyMap() ); + m_rEmitContext.rImages.writeBase64EncodedStream( elem.Image, m_rEmitContext); + m_rEmitContext.rEmitter.endTag( "office:binary-data" ); + m_rEmitContext.rEmitter.endTag( "draw:image" ); +} + +void DrawXmlEmitter::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + PropertyMap aPageProps; + aPageProps[ "draw:master-page-name" ] = m_rEmitContext.rStyles.getStyleName( elem.StyleId ); + + m_rEmitContext.rEmitter.beginTag("draw:page", aPageProps); + + if( m_rEmitContext.xStatusIndicator.is() ) + m_rEmitContext.xStatusIndicator->setValue( elem.PageNumber ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + m_rEmitContext.rEmitter.endTag("draw:page"); +} + +void DrawXmlEmitter::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ + m_rEmitContext.rEmitter.beginTag( "office:body", PropertyMap() ); + m_rEmitContext.rEmitter.beginTag( m_bWriteDrawDocument ? "office:drawing" : "office:presentation", + PropertyMap() ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + m_rEmitContext.rEmitter.endTag( m_bWriteDrawDocument ? "office:drawing" : "office:presentation" ); + m_rEmitContext.rEmitter.endTag( "office:body" ); +} + + +void DrawXmlOptimizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void DrawXmlOptimizer::visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ +} + +void DrawXmlOptimizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + elem.applyToChildren(*this); +} + +void DrawXmlOptimizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void DrawXmlOptimizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& elemIt ) +{ + /* note: optimize two consecutive PolyPolyElements that + * have the same path but one of which is a stroke while + * the other is a fill + */ + if( !elem.Parent ) + return; + + // find following PolyPolyElement in parent's children list + if( elemIt == elem.Parent->Children.end() ) + return; + auto next_it = elemIt; + ++next_it; + if( next_it == elem.Parent->Children.end() ) + return; + + PolyPolyElement* pNext = dynamic_cast<PolyPolyElement*>(next_it->get()); + // TODO(F2): this comparison fails for OOo-generated polygons with beziers. + if( !pNext || pNext->PolyPoly != elem.PolyPoly ) + return; + + const GraphicsContext& rNextGC = + m_rProcessor.getGraphicsContext( pNext->GCId ); + const GraphicsContext& rThisGC = + m_rProcessor.getGraphicsContext( elem.GCId ); + + if( !(rThisGC.BlendMode == rNextGC.BlendMode && + rThisGC.Flatness == rNextGC.Flatness && + rThisGC.Transformation == rNextGC.Transformation && + rThisGC.Clip == rNextGC.Clip && + rThisGC.FillColor.Red == rNextGC.FillColor.Red && + rThisGC.FillColor.Green== rNextGC.FillColor.Green && + rThisGC.FillColor.Blue == rNextGC.FillColor.Blue && + rThisGC.FillColor.Alpha== rNextGC.FillColor.Alpha && + pNext->Action == PATH_STROKE && + (elem.Action == PATH_FILL || elem.Action == PATH_EOFILL)) ) + return; + + GraphicsContext aGC = rThisGC; + aGC.LineJoin = rNextGC.LineJoin; + aGC.LineCap = rNextGC.LineCap; + aGC.LineWidth = rNextGC.LineWidth; + aGC.MiterLimit= rNextGC.MiterLimit; + aGC.DashArray = rNextGC.DashArray; + aGC.LineColor = rNextGC.LineColor; + elem.GCId = m_rProcessor.getGCId( aGC ); + + elem.Action |= pNext->Action; + + elem.Children.splice( elem.Children.end(), pNext->Children ); + elem.Parent->Children.erase(next_it); +} + +void DrawXmlOptimizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + optimizeTextElements( elem ); + + elem.applyToChildren(*this); +} + +void DrawXmlOptimizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( m_rProcessor.getStatusIndicator().is() ) + m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber ); + + // resolve hyperlinks + elem.resolveHyperlinks(); + + elem.resolveFontStyles( m_rProcessor ); // underlines and such + + // FIXME: until hyperlinks and font effects are adjusted for + // geometrical search handle them before sorting + PDFIProcessor::sortElements( &elem ); + + // find paragraphs in text + ParagraphElement* pCurPara = nullptr; + std::list< std::unique_ptr<Element> >::iterator page_element, next_page_element; + next_page_element = elem.Children.begin(); + double fCurLineHeight = 0.0; // average height of text items in current para + int nCurLineElements = 0; // number of line contributing elements in current para + double line_left = elem.w, line_right = 0.0; + double column_width = elem.w*0.75; // estimate text width + // TODO: guess columns + while( next_page_element != elem.Children.end() ) + { + page_element = next_page_element++; + ParagraphElement* pPagePara = dynamic_cast<ParagraphElement*>(page_element->get()); + if( pPagePara ) + { + pCurPara = pPagePara; + // adjust line height and text items + fCurLineHeight = 0.0; + nCurLineElements = 0; + for( const auto& rxChild : pCurPara->Children ) + { + TextElement* pTestText = dynamic_cast<TextElement*>(rxChild.get()); + if( pTestText ) + { + fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pTestText->h)/double(nCurLineElements+1); + nCurLineElements++; + } + } + continue; + } + + HyperlinkElement* pLink = dynamic_cast<HyperlinkElement*>(page_element->get()); + DrawElement* pDraw = dynamic_cast<DrawElement*>(page_element->get()); + if( ! pDraw && pLink && ! pLink->Children.empty() ) + pDraw = dynamic_cast<DrawElement*>(pLink->Children.front().get() ); + if( pDraw ) + { + // insert small drawing objects as character, else leave them page bound + + bool bInsertToParagraph = false; + // first check if this is either inside the paragraph + if( pCurPara && pDraw->y < pCurPara->y + pCurPara->h ) + { + if( pDraw->h < fCurLineHeight * 1.5 ) + { + bInsertToParagraph = true; + fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pDraw->h)/double(nCurLineElements+1); + nCurLineElements++; + // mark draw element as character + pDraw->isCharacter = true; + } + } + // or perhaps the draw element begins a new paragraph + else if( next_page_element != elem.Children.end() ) + { + TextElement* pText = dynamic_cast<TextElement*>(next_page_element->get()); + if( ! pText ) + { + ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(next_page_element->get()); + if( pPara && ! pPara->Children.empty() ) + pText = dynamic_cast<TextElement*>(pPara->Children.front().get()); + } + if( pText && // check there is a text + pDraw->h < pText->h*1.5 && // and it is approx the same height + // and either upper or lower edge of pDraw is inside text's vertical range + ( ( pDraw->y >= pText->y && pDraw->y <= pText->y+pText->h ) || + ( pDraw->y+pDraw->h >= pText->y && pDraw->y+pDraw->h <= pText->y+pText->h ) + ) + ) + { + bInsertToParagraph = true; + fCurLineHeight = pDraw->h; + nCurLineElements = 1; + line_left = pDraw->x; + line_right = pDraw->x + pDraw->w; + // begin a new paragraph + pCurPara = nullptr; + // mark draw element as character + pDraw->isCharacter = true; + } + } + + if( ! bInsertToParagraph ) + { + pCurPara = nullptr; + continue; + } + } + + TextElement* pText = dynamic_cast<TextElement*>(page_element->get()); + if( ! pText && pLink && ! pLink->Children.empty() ) + pText = dynamic_cast<TextElement*>(pLink->Children.front().get()); + if( pText ) + { + Element* pGeo = pLink ? static_cast<Element*>(pLink) : + static_cast<Element*>(pText); + if( pCurPara ) + { + // there was already a text element, check for a new paragraph + if( nCurLineElements > 0 ) + { + // if the new text is significantly distant from the paragraph + // begin a new paragraph + if( pGeo->y > pCurPara->y + pCurPara->h + fCurLineHeight*0.5 ) + pCurPara = nullptr; // insert new paragraph + else if( pGeo->y > (pCurPara->y+pCurPara->h - fCurLineHeight*0.05) ) + { + // new paragraph if either the last line of the paragraph + // was significantly shorter than the paragraph as a whole + if( (line_right - line_left) < pCurPara->w*0.75 ) + pCurPara = nullptr; + // or the last line was significantly smaller than the column width + else if( (line_right - line_left) < column_width*0.75 ) + pCurPara = nullptr; + } + } + + + } + + + // update line height/width + if( pCurPara ) + { + fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pGeo->h)/double(nCurLineElements+1); + nCurLineElements++; + if( pGeo->x < line_left ) + line_left = pGeo->x; + if( pGeo->x+pGeo->w > line_right ) + line_right = pGeo->x+pGeo->w; + } + else + { + fCurLineHeight = pGeo->h; + nCurLineElements = 1; + line_left = pGeo->x; + line_right = pGeo->x + pGeo->w; + } + } + + + // move element to current paragraph + if (! pCurPara ) // new paragraph, insert one + { + pCurPara = ElementFactory::createParagraphElement( nullptr ); + // set parent + pCurPara->Parent = &elem; + //insert new paragraph before current element + page_element = elem.Children.insert( page_element, std::unique_ptr<Element>(pCurPara) ); + // forward iterator to current element again + ++ page_element; + // update next_element which is now invalid + next_page_element = page_element; + ++ next_page_element; + } + Element* pCurEle = page_element->get(); + Element::setParent( page_element, pCurPara ); + OSL_ENSURE( !pText || pCurEle == pText || pCurEle == pLink, "paragraph child list in disorder" ); + if( pText || pDraw ) + pCurPara->updateGeometryWith( pCurEle ); + } + + // process children + elem.applyToChildren(*this); +} + +static bool isSpaces(TextElement* pTextElem) +{ + for (sal_Int32 i = 0; i != pTextElem->Text.getLength(); ++i) { + if (pTextElem->Text[i] != ' ') { + return false; + } + } + return true; +} + +void DrawXmlOptimizer::optimizeTextElements(Element& rParent) +{ + if( rParent.Children.empty() ) // this should not happen + { + OSL_FAIL( "empty paragraph optimized" ); + return; + } + + // concatenate child elements with same font id + auto next = rParent.Children.begin(); + auto it = next++; + + while( next != rParent.Children.end() ) + { + bool bConcat = false; + TextElement* pCur = dynamic_cast<TextElement*>(it->get()); + + if( pCur ) + { + TextElement* pNext = dynamic_cast<TextElement*>(next->get()); + bool isComplex = false; + OUString str(pCur->Text.toString()); + for(int i=0; i< str.getLength(); i++) + { + sal_Int16 nType = GetBreakIterator()->getScriptType( str, i ); + if (nType == css::i18n::ScriptType::COMPLEX) + isComplex = true; + } + bool bPara = strspn("ParagraphElement", typeid(rParent).name()); + ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(&rParent); + if (bPara && pPara && isComplex) + pPara->bRtl = true; + if( pNext ) + { + const GraphicsContext& rCurGC = m_rProcessor.getGraphicsContext( pCur->GCId ); + const GraphicsContext& rNextGC = m_rProcessor.getGraphicsContext( pNext->GCId ); + + // line and space optimization; works only in strictly horizontal mode + + // concatenate consecutive text elements unless there is a + // font or text color change, leave a new span in that case + if( (pCur->FontId == pNext->FontId || isSpaces(pNext)) && + rCurGC.FillColor.Red == rNextGC.FillColor.Red && + rCurGC.FillColor.Green == rNextGC.FillColor.Green && + rCurGC.FillColor.Blue == rNextGC.FillColor.Blue && + rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha + ) + { + pCur->updateGeometryWith( pNext ); + if (pPara->bRtl) + { + // Tdf#152083: If RTL, reverse the text in pNext so that its correct order is + // restored when the combined text is reversed in DrawXmlEmitter::visit. + OUString tempStr; + bool bNeedReverse=false; + str = pNext->Text.toString(); + for (sal_Int32 i=0; i < str.getLength(); i++) + { + if (str[i] == u' ') + { // Space char (e.g. the space as in " م") needs special treatment. + // First, append the space char to pCur. + pCur->Text.append(OUStringChar(str[i])); + // Then, check whether the tmpStr needs reverse, if so then reverse and append. + if (bNeedReverse) + { + tempStr = ::comphelper::string::reverseCodePoints(tempStr); + pCur->Text.append(tempStr); + tempStr = u""; + } + bNeedReverse = false; + } + else + { + tempStr += OUStringChar(str[i]); + bNeedReverse = true; + } + } + // Do the last append + if (bNeedReverse) + { + tempStr = ::comphelper::string::reverseCodePoints(tempStr); + pCur->Text.append(tempStr); + } + else + { + pCur->Text.append(tempStr); + } + } + else + { + // append text to current element directly without reverse + pCur->Text.append( pNext->Text ); + } + + str = pCur->Text.toString(); + for(int i=0; i< str.getLength(); i++) + { + sal_Int16 nType = GetBreakIterator()->getScriptType( str, i ); + if (nType == css::i18n::ScriptType::COMPLEX) + isComplex = true; + } + if (bPara && pPara && isComplex) + pPara->bRtl = true; + // append eventual children to current element + // and clear children (else the children just + // appended to pCur would be destroyed) + pCur->Children.splice( pCur->Children.end(), pNext->Children ); + // get rid of the now useless element + rParent.Children.erase( next ); + bConcat = true; + } + } + } + else if( dynamic_cast<HyperlinkElement*>(it->get()) ) + optimizeTextElements( **it ); + if ( bConcat ) + next = it; + else + ++it; + ++next; + } +} + +void DrawXmlOptimizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ + elem.applyToChildren(*this); +} + + +void DrawXmlFinalizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + // xxx TODO copied from DrawElement + const GraphicsContext& rGC = m_rProcessor.getGraphicsContext(elem.GCId ); + + PropertyMap aProps; + aProps[ "style:family" ] = "graphic"; + aProps[ "style:parent-style-name" ] = "standard"; + // generate standard graphic style if necessary + m_rStyleContainer.getStandardStyleId( "graphic" ); + + PropertyMap aGCProps; + if (elem.Action & PATH_STROKE) + { + double scale = GetAverageTransformationScale(rGC.Transformation); + if (rGC.DashArray.size() < 2) + { + aGCProps[ "draw:stroke" ] = "solid"; + } + else + { + PropertyMap props; + FillDashStyleProps(props, rGC.DashArray, scale); + StyleContainer::Style style("draw:stroke-dash", std::move(props)); + + aGCProps[ "draw:stroke" ] = "dash"; + aGCProps[ "draw:stroke-dash" ] = + m_rStyleContainer.getStyleName( + m_rStyleContainer.getStyleId(style)); + } + + aGCProps[ "svg:stroke-color" ] = getColorString(rGC.LineColor); + if (rGC.LineColor.Alpha != 1.0) + aGCProps["svg:stroke-opacity"] = getPercentString(rGC.LineColor.Alpha * 100.0); + aGCProps[ "svg:stroke-width" ] = convertPixelToUnitString(rGC.LineWidth * scale); + aGCProps[ "draw:stroke-linejoin" ] = rGC.GetLineJoinString(); + aGCProps[ "svg:stroke-linecap" ] = rGC.GetLineCapString(); + } + else + { + aGCProps[ "draw:stroke" ] = "none"; + } + + // TODO(F1): check whether stuff could be emulated by gradient/bitmap/hatch + if( elem.Action & (PATH_FILL | PATH_EOFILL) ) + { + aGCProps[ "draw:fill" ] = "solid"; + aGCProps[ "draw:fill-color" ] = getColorString(rGC.FillColor); + if (rGC.FillColor.Alpha != 1.0) + aGCProps["draw:opacity"] = getPercentString(rGC.FillColor.Alpha * 100.0); + } + else + { + aGCProps[ "draw:fill" ] = "none"; + } + + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:graphic-properties", std::move(aGCProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); +} + +void DrawXmlFinalizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +static void SetFontsizeProperties(PropertyMap& props, double fontSize) +{ + OUString aFSize = OUString::number(fontSize * 72 / PDFI_OUTDEV_RESOLUTION) + "pt"; + props["fo:font-size"] = aFSize; + props["style:font-size-asian"] = aFSize; + props["style:font-size-complex"] = aFSize; +} + +void DrawXmlFinalizer::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + const FontAttributes& rFont = m_rProcessor.getFont( elem.FontId ); + PropertyMap aProps; + aProps[ "style:family" ] = "text"; + + PropertyMap aFontProps; + + // family name + // TODO: tdf#143095: use system font name rather than PSName + SAL_INFO("sdext.pdfimport", "The font used in xml is: " << rFont.familyName); + aFontProps[ "fo:font-family" ] = rFont.familyName; + aFontProps[ "style:font-family-asian" ] = rFont.familyName; + aFontProps[ "style:font-family-complex" ] = rFont.familyName; + + // bold + aFontProps[ "fo:font-weight" ] = rFont.fontWeight; + aFontProps[ "style:font-weight-asian" ] = rFont.fontWeight; + aFontProps[ "style:font-weight-complex" ] = rFont.fontWeight; + + // italic + if( rFont.isItalic ) + { + aFontProps[ "fo:font-style" ] = "italic"; + aFontProps[ "style:font-style-asian" ] = "italic"; + aFontProps[ "style:font-style-complex" ] = "italic"; + } + + // underline + if( rFont.isUnderline ) + { + aFontProps[ "style:text-underline-style" ] = "solid"; + aFontProps[ "style:text-underline-width" ] = "auto"; + aFontProps[ "style:text-underline-color" ] = "font-color"; + } + + // outline + if( rFont.isOutline ) + aFontProps[ "style:text-outline" ] = "true"; + + // size + SetFontsizeProperties(aFontProps, rFont.size); + + // color + const GraphicsContext& rGC = m_rProcessor.getGraphicsContext( elem.GCId ); + aFontProps[ "fo:color" ] = getColorString( rFont.isOutline ? rGC.LineColor : rGC.FillColor ); + + // scale + double fRotate, fShearX; + basegfx::B2DTuple aScale, aTranslation; + rGC.Transformation.decompose(aScale, aTranslation, fRotate, fShearX); + double textScale = 100 * aScale.getX() / aScale.getY(); + if (((textScale >= 1) && (textScale <= 99)) || + ((textScale >= 101) && (textScale <= 999))) + { + aFontProps[ "style:text-scale" ] = getPercentString(textScale); + } + + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:text-properties", std::move(aFontProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); +} + +void DrawXmlFinalizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + + PropertyMap aProps; + aProps[ "style:family" ] = "paragraph"; + // generate standard paragraph style if necessary + m_rStyleContainer.getStandardStyleId( "paragraph" ); + + PropertyMap aParProps; + + aParProps[ "fo:text-align"] = "start"; + if (elem.bRtl) + aParProps[ "style:writing-mode"] = "rl-tb"; + else + aParProps[ "style:writing-mode"] = "lr-tb"; + + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:paragraph-properties", std::move(aParProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); + + elem.applyToChildren(*this); +} + +void DrawXmlFinalizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ + PropertyMap props1; + props1[ "style:family" ] = "graphic"; + props1[ "style:parent-style-name" ] = "standard"; + // generate standard graphic style if necessary + m_rStyleContainer.getStandardStyleId( "graphic" ); + + PropertyMap aGCProps; + + aGCProps[ "draw:stroke" ] = "none"; + aGCProps[ "draw:fill" ] = "none"; + aGCProps[ "draw:auto-grow-height" ] = "true"; + aGCProps[ "draw:auto-grow-width" ] = "true"; + aGCProps[ "draw:textarea-horizontal-align" ] = "left"; + aGCProps[ "draw:textarea-vertical-align" ] = "top"; + aGCProps[ "fo:min-height"] = "0cm"; + aGCProps[ "fo:min-width"] = "0cm"; + aGCProps[ "fo:padding-top" ] = "0cm"; + aGCProps[ "fo:padding-left" ] = "0cm"; + aGCProps[ "fo:padding-right" ] = "0cm"; + aGCProps[ "fo:padding-bottom" ] = "0cm"; + + StyleContainer::Style style1( "style:style", std::move(props1) ); + StyleContainer::Style subStyle1( "style:graphic-properties", std::move(aGCProps) ); + style1.SubStyles.push_back(&subStyle1); + + elem.StyleId = m_rStyleContainer.getStyleId(style1); + + if (elem.IsForText) + { + PropertyMap props2; + props2["style:family"] = "paragraph"; + + PropertyMap textProps; + SetFontsizeProperties(textProps, elem.FontSize); + + StyleContainer::Style style2("style:style", std::move(props2)); + StyleContainer::Style subStyle2("style:text-properties", std::move(textProps)); + style2.SubStyles.push_back(&subStyle2); + elem.TextStyleId = m_rStyleContainer.getStyleId(style2); + } + + elem.applyToChildren(*this); +} + +void DrawXmlFinalizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void DrawXmlFinalizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( m_rProcessor.getStatusIndicator().is() ) + m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber ); + + // transform from pixel to mm + double page_width = convPx2mm( elem.w ), page_height = convPx2mm( elem.h ); + + // calculate page margins out of the relevant children (paragraphs) + elem.TopMargin = elem.h; + elem.BottomMargin = 0; + elem.LeftMargin = elem.w; + elem.RightMargin = 0; + + for( const auto& rxChild : elem.Children ) + { + if( rxChild->x < elem.LeftMargin ) + elem.LeftMargin = rxChild->x; + if( rxChild->y < elem.TopMargin ) + elem.TopMargin = rxChild->y; + if( rxChild->x + rxChild->w > elem.RightMargin ) + elem.RightMargin = (rxChild->x + rxChild->w); + if( rxChild->y + rxChild->h > elem.BottomMargin ) + elem.BottomMargin = (rxChild->y + rxChild->h); + } + + // transform margins to mm + double left_margin = convPx2mm( elem.LeftMargin ); + double right_margin = convPx2mm( elem.RightMargin ); + double top_margin = convPx2mm( elem.TopMargin ); + double bottom_margin = convPx2mm( elem.BottomMargin ); + + // round left/top margin to nearest mm + left_margin = rtl_math_round( left_margin, 0, rtl_math_RoundingMode_Floor ); + top_margin = rtl_math_round( top_margin, 0, rtl_math_RoundingMode_Floor ); + // round (fuzzy) right/bottom margin to nearest cm + right_margin = rtl_math_round( right_margin, right_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor ); + bottom_margin = rtl_math_round( bottom_margin, bottom_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor ); + + // set reasonable default in case of way too large margins + // e.g. no paragraph case + if( left_margin > page_width/2.0 - 10 ) + left_margin = 10; + if( right_margin > page_width/2.0 - 10 ) + right_margin = 10; + if( top_margin > page_height/2.0 - 10 ) + top_margin = 10; + if( bottom_margin > page_height/2.0 - 10 ) + bottom_margin = 10; + + // catch the weird cases + if( left_margin < 0 ) + left_margin = 0; + if( right_margin < 0 ) + right_margin = 0; + if( top_margin < 0 ) + top_margin = 0; + if( bottom_margin < 0 ) + bottom_margin = 0; + + // widely differing margins are unlikely to be correct + if( right_margin > left_margin*1.5 ) + right_margin = left_margin; + + elem.LeftMargin = convmm2Px( left_margin ); + elem.RightMargin = convmm2Px( right_margin ); + elem.TopMargin = convmm2Px( top_margin ); + elem.BottomMargin = convmm2Px( bottom_margin ); + + // get styles for paragraphs + PropertyMap aPageProps; + PropertyMap aPageLayoutProps; + aPageLayoutProps[ "fo:margin-top" ] = unitMMString( top_margin ); + aPageLayoutProps[ "fo:margin-bottom" ] = unitMMString( bottom_margin ); + aPageLayoutProps[ "fo:margin-left" ] = unitMMString( left_margin ); + aPageLayoutProps[ "fo:margin-right" ] = unitMMString( right_margin ); + aPageLayoutProps[ "fo:page-width" ] = unitMMString( page_width ); + aPageLayoutProps[ "fo:page-height" ] = unitMMString( page_height ); + aPageLayoutProps[ "style:print-orientation" ]= elem.w < elem.h ? std::u16string_view(u"portrait") : std::u16string_view(u"landscape"); + aPageLayoutProps[ "style:writing-mode" ]= "lr-tb"; + + StyleContainer::Style aStyle( "style:page-layout", std::move(aPageProps)); + StyleContainer::Style aSubStyle( "style:page-layout-properties", std::move(aPageLayoutProps)); + aStyle.SubStyles.push_back(&aSubStyle); + sal_Int32 nPageStyle = m_rStyleContainer.impl_getStyleId( aStyle, false ); + + // create master page + OUString aMasterPageLayoutName = m_rStyleContainer.getStyleName( nPageStyle ); + aPageProps[ "style:page-layout-name" ] = aMasterPageLayoutName; + + StyleContainer::Style aMPStyle( "style:master-page", std::move(aPageProps)); + + elem.StyleId = m_rStyleContainer.impl_getStyleId( aMPStyle,false ); + + // create styles for children + elem.applyToChildren(*this); +} + +void DrawXmlFinalizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + elem.applyToChildren(*this); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/drawtreevisiting.hxx b/sdext/source/pdfimport/tree/drawtreevisiting.hxx new file mode 100644 index 000000000..81bfd9273 --- /dev/null +++ b/sdext/source/pdfimport/tree/drawtreevisiting.hxx @@ -0,0 +1,117 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_DRAWTREEVISITING_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_DRAWTREEVISITING_HXX + +#include <treevisiting.hxx> + +#include <com/sun/star/i18n/XBreakIterator.hpp> +#include <com/sun/star/i18n/XCharacterClassification.hpp> +#include <com/sun/star/lang/XMultiServiceFactory.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> + +namespace pdfi +{ + struct DrawElement; + + class DrawXmlOptimizer : public ElementTreeVisitor + { + private: + PDFIProcessor& m_rProcessor; + css::uno::Reference< css::i18n::XBreakIterator > mxBreakIter; + + void optimizeTextElements(Element& rParent); + + public: + const css::uno::Reference< css::i18n::XBreakIterator >& GetBreakIterator(); + explicit DrawXmlOptimizer(PDFIProcessor& rProcessor) : + m_rProcessor(rProcessor) + {} + + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + class DrawXmlFinalizer : public ElementTreeVisitor + { + private: + StyleContainer& m_rStyleContainer; + PDFIProcessor& m_rProcessor; + + public: + explicit DrawXmlFinalizer(StyleContainer& rStyleContainer, + PDFIProcessor& rProcessor) : + m_rStyleContainer(rStyleContainer), + m_rProcessor(rProcessor) + {} + + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + class DrawXmlEmitter : public ElementTreeVisitor + { + private: + css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass; + + EmitContext& m_rEmitContext ; + /// writes Impress doc when false + const bool m_bWriteDrawDocument; + + static void fillFrameProps( DrawElement& rElem, + PropertyMap& rProps, + const EmitContext& rEmitContext, + bool bWasTransformed + ); + + public: + const css::uno::Reference< css::i18n::XCharacterClassification >& GetCharacterClassification(); + enum DocType{ DRAW_DOC, IMPRESS_DOC }; + explicit DrawXmlEmitter(EmitContext& rEmitContext, DocType eDocType) : + m_rEmitContext(rEmitContext), + m_bWriteDrawDocument(eDocType==DRAW_DOC) + {} + + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/genericelements.cxx b/sdext/source/pdfimport/tree/genericelements.cxx new file mode 100644 index 000000000..2c394c973 --- /dev/null +++ b/sdext/source/pdfimport/tree/genericelements.cxx @@ -0,0 +1,434 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <genericelements.hxx> +#include <pdfiprocessor.hxx> +#include <pdfihelper.hxx> + + +#include <basegfx/polygon/b2dpolypolygontools.hxx> +#include <basegfx/range/b2drange.hxx> +#include <sal/log.hxx> + +namespace pdfi +{ + +Element::~Element() +{ +} + +void Element::applyToChildren( ElementTreeVisitor& rVisitor ) +{ + for( auto it = Children.begin(); it != Children.end(); ++it ) + (*it)->visitedBy( rVisitor, it ); +} + +void Element::setParent( std::list<std::unique_ptr<Element>>::iterator const & el, Element* pNewParent ) +{ + if( pNewParent ) + { + pNewParent->Children.splice( pNewParent->Children.end(), (*el)->Parent->Children, el ); + (*el)->Parent = pNewParent; + } +} + +void Element::updateGeometryWith( const Element* pMergeFrom ) +{ + if( w == 0 && h == 0 ) + { + x = pMergeFrom->x; + y = pMergeFrom->y; + w = pMergeFrom->w; + h = pMergeFrom->h; + } + else + { + if( pMergeFrom->x < x ) + { + w += x - pMergeFrom->x; + x = pMergeFrom->x; + } + if( pMergeFrom->x+pMergeFrom->w > x+w ) + w = pMergeFrom->w+pMergeFrom->x - x; + if( pMergeFrom->y < y ) + { + h += y - pMergeFrom->y; + y = pMergeFrom->y; + } + if( pMergeFrom->y+pMergeFrom->h > y+h ) + h = pMergeFrom->h+pMergeFrom->y - y; + } +} + + +#if OSL_DEBUG_LEVEL > 0 +#include <typeinfo> +void Element::emitStructure( int nLevel) +{ + SAL_INFO( "sdext", std::string(nLevel, ' ') << "<" << typeid( *this ).name() << " " << this << "> (" + << std::setprecision(1) << x << "," << y << ")+(" << w << "x" << h << ")" ); + for (auto const& child : Children) + child->emitStructure(nLevel+1); + SAL_INFO( "sdext", std::string(nLevel, ' ') << "</" << typeid( *this ).name() << ">" ); +} +#endif + +void ListElement::visitedBy( ElementTreeVisitor& visitor, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + // this is only an inner node + applyToChildren(visitor); +} + +void HyperlinkElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) +{ + rVisitor.visit(*this,rParentIt); +} + +void TextElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) +{ + rVisitor.visit(*this,rParentIt); +} + +void FrameElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) +{ + rVisitor.visit(*this,rParentIt); +} + +void ImageElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt) +{ + rVisitor.visit( *this, rParentIt); +} + +PolyPolyElement::PolyPolyElement( Element* pParent, + sal_Int32 nGCId, + const basegfx::B2DPolyPolygon& rPolyPoly, + sal_Int8 nAction ) + : DrawElement( pParent, nGCId ), + PolyPoly( rPolyPoly ), + Action( nAction ) +{ +} + +void PolyPolyElement::updateGeometry() +{ + basegfx::B2DRange aRange; + if( PolyPoly.areControlPointsUsed() ) + aRange = basegfx::utils::getRange( basegfx::utils::adaptiveSubdivideByAngle( PolyPoly ) ); + else + aRange = basegfx::utils::getRange( PolyPoly ); + x = aRange.getMinX(); + y = aRange.getMinY(); + w = aRange.getWidth(); + h = aRange.getHeight(); + + // fdo#32330 - non-closed paths will not show up filled in LibO + if( Action & (PATH_FILL | PATH_EOFILL) ) + PolyPoly.setClosed(true); +} + +void PolyPolyElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt) +{ + rVisitor.visit( *this, rParentIt); +} + +#if OSL_DEBUG_LEVEL > 0 +void PolyPolyElement::emitStructure( int nLevel) +{ + SAL_INFO( "sdext", std::string(nLevel, ' ') << "<" << typeid( *this ).name() << " " << this << ">" ); + SAL_INFO( "sdext", "path=" ); + int nPoly = PolyPoly.count(); + for( int i = 0; i < nPoly; i++ ) + { + OUStringBuffer buff; + basegfx::B2DPolygon aPoly = PolyPoly.getB2DPolygon( i ); + int nPoints = aPoly.count(); + for( int n = 0; n < nPoints; n++ ) + { + basegfx::B2DPoint aPoint = aPoly.getB2DPoint( n ); + buff.append( " (" + OUString::number(aPoint.getX()) + "," + OUString::number(aPoint.getY()) + ")"); + } + SAL_INFO( "sdext", " " << buff.makeStringAndClear() ); + } + for (auto const& child : Children) + child->emitStructure( nLevel+1 ); + SAL_INFO( "sdext", std::string(nLevel, ' ') << "</" << typeid( *this ).name() << ">"); +} +#endif + +void ParagraphElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) +{ + rVisitor.visit(*this,rParentIt); +} + +bool ParagraphElement::isSingleLined( PDFIProcessor const & rProc ) const +{ + TextElement* pText = nullptr, *pLastText = nullptr; + for( auto& rxChild : Children ) + { + // a paragraph containing subparagraphs cannot be single lined + if( dynamic_cast< ParagraphElement* >(rxChild.get()) != nullptr ) + return false; + + pText = dynamic_cast< TextElement* >(rxChild.get()); + if( pText ) + { + const FontAttributes& rFont = rProc.getFont( pText->FontId ); + if( pText->h > rFont.size*1.5 ) + return false; + if( pLastText ) + { + if( pText->y > pLastText->y+pLastText->h || + pLastText->y > pText->y+pText->h ) + return false; + } + else + pLastText = pText; + } + } + + // a paragraph without a single text is not considered single lined + return pLastText != nullptr; +} + +double ParagraphElement::getLineHeight( PDFIProcessor& rProc ) const +{ + double line_h = 0; + for( auto& rxChild : Children ) + { + ParagraphElement* pPara = dynamic_cast< ParagraphElement* >(rxChild.get()); + TextElement* pText = nullptr; + if( pPara ) + { + double lh = pPara->getLineHeight( rProc ); + if( lh > line_h ) + line_h = lh; + } + else if( (pText = dynamic_cast< TextElement* >( rxChild.get() )) != nullptr ) + { + const FontAttributes& rFont = rProc.getFont( pText->FontId ); + double lh = pText->h; + if( pText->h > rFont.size*1.5 ) + lh = rFont.size; + if( lh > line_h ) + line_h = lh; + } + } + return line_h; +} + +TextElement* ParagraphElement::getFirstTextChild() const +{ + TextElement* pText = nullptr; + auto it = std::find_if(Children.begin(), Children.end(), + [](const std::unique_ptr<Element>& rxElem) { return dynamic_cast<TextElement*>(rxElem.get()) != nullptr; }); + if (it != Children.end()) + pText = dynamic_cast<TextElement*>(it->get()); + return pText; +} + +PageElement::~PageElement() +{ +} + +void PageElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) +{ + rVisitor.visit(*this, rParentIt); +} + +bool PageElement::resolveHyperlink( const std::list<std::unique_ptr<Element>>::iterator& link_it, std::list<std::unique_ptr<Element>>& rElements ) +{ + HyperlinkElement* pLink = dynamic_cast<HyperlinkElement*>(link_it->get()); + if( ! pLink ) // sanity check + return false; + + for( auto it = rElements.begin(); it != rElements.end(); ++it ) + { + if( (*it)->x >= pLink->x && (*it)->x + (*it)->w <= pLink->x + pLink->w && + (*it)->y >= pLink->y && (*it)->y + (*it)->h <= pLink->y + pLink->h ) + { + TextElement* pText = dynamic_cast<TextElement*>(it->get()); + if( pText ) + { + if( pLink->Children.empty() ) + { + // insert the hyperlink before the frame + rElements.splice( it, Hyperlinks.Children, link_it ); + pLink->Parent = (*it)->Parent; + } + // move text element into hyperlink + auto next = it; + ++next; + Element::setParent( it, pLink ); + it = next; + --it; + continue; + } + // a link can contain multiple text elements or a single frame + if( ! pLink->Children.empty() ) + continue; + if( dynamic_cast<ParagraphElement*>(it->get()) ) + { + if( resolveHyperlink( link_it, (*it)->Children ) ) + break; + continue; + } + FrameElement* pFrame = dynamic_cast<FrameElement*>(it->get()); + if( pFrame ) + { + // insert the hyperlink before the frame + rElements.splice( it, Hyperlinks.Children, link_it ); + pLink->Parent = (*it)->Parent; + // move frame into hyperlink + Element::setParent( it, pLink ); + break; + } + } + } + return ! pLink->Children.empty(); +} + +void PageElement::resolveHyperlinks() +{ + while( ! Hyperlinks.Children.empty() ) + { + if( ! resolveHyperlink( Hyperlinks.Children.begin(), Children ) ) + { + Hyperlinks.Children.pop_front(); + } + } +} + +void PageElement::resolveFontStyles( PDFIProcessor const & rProc ) +{ + resolveUnderlines(rProc); +} + +void PageElement::resolveUnderlines( PDFIProcessor const & rProc ) +{ + // FIXME: currently the algorithm used is quadratic + // this could be solved by some sorting beforehand + + auto poly_it = Children.begin(); + while( poly_it != Children.end() ) + { + PolyPolyElement* pPoly = dynamic_cast< PolyPolyElement* >(poly_it->get()); + if( ! pPoly || ! pPoly->Children.empty() ) + { + ++poly_it; + continue; + } + /* check for: no filling + * only two points (FIXME: handle small rectangles, too) + * y coordinates of points are equal + */ + if( pPoly->Action != PATH_STROKE ) + { + ++poly_it; + continue; + } + if( pPoly->PolyPoly.count() != 1 ) + { + ++poly_it; + continue; + } + + bool bRemovePoly = false; + basegfx::B2DPolygon aPoly = pPoly->PolyPoly.getB2DPolygon(0); + if( aPoly.count() != 2 || + aPoly.getB2DPoint(0).getY() != aPoly.getB2DPoint(1).getY() ) + { + ++poly_it; + continue; + } + double l_x = aPoly.getB2DPoint(0).getX(); + double r_x = aPoly.getB2DPoint(1).getX(); + double u_y; + if( r_x < l_x ) + { + u_y = r_x; r_x = l_x; l_x = u_y; + } + u_y = aPoly.getB2DPoint(0).getY(); + for( const auto& rxChild : Children ) + { + Element* pEle = rxChild.get(); + if( pEle->y <= u_y && pEle->y + pEle->h*1.1 >= u_y ) + { + // first: is the element underlined completely ? + if( pEle->x + pEle->w*0.1 >= l_x && + pEle->x + pEle->w*0.9 <= r_x ) + { + TextElement* pText = dynamic_cast< TextElement* >(pEle); + if( pText ) + { + const GraphicsContext& rTextGC = rProc.getGraphicsContext( pText->GCId ); + if( ! rTextGC.isRotatedOrSkewed() ) + { + bRemovePoly = true; + // retrieve ID for modified font + FontAttributes aAttr = rProc.getFont( pText->FontId ); + aAttr.isUnderline = true; + pText->FontId = rProc.getFontId( aAttr ); + } + } + else if( dynamic_cast< HyperlinkElement* >(pEle) ) + bRemovePoly = true; + } + // second: hyperlinks may be larger than their underline + // since they are just arbitrary rectangles in the action definition + else if( dynamic_cast< HyperlinkElement* >(pEle) != nullptr && + l_x >= pEle->x && r_x <= pEle->x+pEle->w ) + { + bRemovePoly = true; + } + } + } + if( bRemovePoly ) + { + auto next_it = poly_it; + ++next_it; + Children.erase( poly_it ); + poly_it = next_it; + } + else + ++poly_it; + } +} + +DocumentElement::~DocumentElement() +{ +} + +void DocumentElement::visitedBy( ElementTreeVisitor& rVisitor, + const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt) +{ + rVisitor.visit(*this, rParentIt); +} + + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/imagecontainer.cxx b/sdext/source/pdfimport/tree/imagecontainer.cxx new file mode 100644 index 000000000..a7154164d --- /dev/null +++ b/sdext/source/pdfimport/tree/imagecontainer.cxx @@ -0,0 +1,146 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <imagecontainer.hxx> +#include <genericelements.hxx> +#include <xmlemitter.hxx> + +#include <rtl/ustrbuf.hxx> +#include <sal/log.hxx> +#include <o3tl/safeint.hxx> +#include <osl/diagnose.h> + +#include <com/sun/star/beans/PropertyValue.hpp> + +using namespace com::sun::star; + +namespace pdfi +{ + +namespace +{ + +const char aBase64EncodeTable[] = + { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; + +OUString encodeBase64( const sal_Int8* i_pBuffer, const sal_Int32 i_nBufferLength ) +{ + OUStringBuffer aBuf( (i_nBufferLength+1) * 4 / 3 ); + const sal_Int32 nRemain(i_nBufferLength%3); + const sal_Int32 nFullTripleLength( i_nBufferLength - (i_nBufferLength%3)); + sal_Int32 nBufPos( 0 ); + for( sal_Int32 i = 0; i < nFullTripleLength; i += 3, nBufPos += 4 ) + { + const sal_Int32 nBinary = (static_cast<sal_uInt8>(i_pBuffer[i + 0]) << 16) + + (static_cast<sal_uInt8>(i_pBuffer[i + 1]) << 8) + + static_cast<sal_uInt8>(i_pBuffer[i + 2]); + + aBuf.append("===="); + + sal_uInt8 nIndex (static_cast<sal_uInt8>((nBinary & 0xFC0000) >> 18)); + aBuf[nBufPos] = aBase64EncodeTable [nIndex]; + + nIndex = static_cast<sal_uInt8>((nBinary & 0x3F000) >> 12); + aBuf[nBufPos+1] = aBase64EncodeTable [nIndex]; + + nIndex = static_cast<sal_uInt8>((nBinary & 0xFC0) >> 6); + aBuf[nBufPos+2] = aBase64EncodeTable [nIndex]; + + nIndex = static_cast<sal_uInt8>((nBinary & 0x3F)); + aBuf[nBufPos+3] = aBase64EncodeTable [nIndex]; + } + if( nRemain > 0 ) + { + aBuf.append("===="); + sal_Int32 nBinary( 0 ); + const sal_Int32 nStart(i_nBufferLength-nRemain); + switch(nRemain) + { + case 1: nBinary = static_cast<sal_uInt8>(i_pBuffer[nStart + 0]) << 16; + break; + case 2: nBinary = (static_cast<sal_uInt8>(i_pBuffer[nStart + 0]) << 16) + + (static_cast<sal_uInt8>(i_pBuffer[nStart + 1]) << 8); + break; + } + sal_uInt8 nIndex (static_cast<sal_uInt8>((nBinary & 0xFC0000) >> 18)); + aBuf[nBufPos] = aBase64EncodeTable [nIndex]; + + nIndex = static_cast<sal_uInt8>((nBinary & 0x3F000) >> 12); + aBuf[nBufPos+1] = aBase64EncodeTable [nIndex]; + + if( nRemain == 2 ) + { + nIndex = static_cast<sal_uInt8>((nBinary & 0xFC0) >> 6); + aBuf[nBufPos+2] = aBase64EncodeTable [nIndex]; + } + } + + return aBuf.makeStringAndClear(); +} + +} // namespace + +ImageContainer::ImageContainer() +{} + +ImageId ImageContainer::addImage( const uno::Sequence<beans::PropertyValue>& xBitmap ) +{ + m_aImages.push_back( xBitmap ); + return m_aImages.size()-1; +} + +void ImageContainer::writeBase64EncodedStream( ImageId nId, EmitContext& rContext ) +{ + OSL_ASSERT( nId >= 0 && o3tl::make_unsigned(nId) < m_aImages.size() ); + + const uno::Sequence<beans::PropertyValue>& rEntry( m_aImages[nId] ); + + // find "InputSequence" property + const beans::PropertyValue* pAry(rEntry.getConstArray()); + const sal_Int32 nLen(rEntry.getLength()); + const beans::PropertyValue* pValue( + std::find_if(pAry, pAry+nLen, + [] (beans::PropertyValue const& v) -> bool { + return v.Name == "InputSequence"; + })); + + if (pValue == pAry + nLen ) + { + SAL_WARN("sdext.pdfimport", "InputSequence not found"); + return; + } + + uno::Sequence<sal_Int8> aData; + if( !(pValue->Value >>= aData) ) + { + SAL_WARN("sdext.pdfimport", "Wrong data type"); + return; + } + + rContext.rEmitter.write( encodeBase64( aData.getConstArray(), aData.getLength() )); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.cxx b/sdext/source/pdfimport/tree/pdfiprocessor.cxx new file mode 100644 index 000000000..23a2cd5a0 --- /dev/null +++ b/sdext/source/pdfimport/tree/pdfiprocessor.cxx @@ -0,0 +1,715 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <pdfiprocessor.hxx> +#include <xmlemitter.hxx> +#include <pdfihelper.hxx> +#include <imagecontainer.hxx> +#include <genericelements.hxx> +#include "style.hxx" +#include <treevisiting.hxx> + +#include <sal/log.hxx> + +#include <comphelper/sequence.hxx> +#include <basegfx/polygon/b2dpolygonclipper.hxx> +#include <basegfx/polygon/b2dpolygontools.hxx> +#include <basegfx/utils/canvastools.hxx> +#include <basegfx/matrix/b2dhommatrix.hxx> +#include <i18nutil/unicode.hxx> + +using namespace com::sun::star; + + +namespace pdfi +{ + + PDFIProcessor::PDFIProcessor( const uno::Reference< task::XStatusIndicator >& xStat , + css::uno::Reference< css::uno::XComponentContext > const & xContext) : + + m_xContext(xContext), + prevCharWidth(0), + m_pDocument( ElementFactory::createDocumentElement() ), + m_pCurPage(nullptr), + m_pCurElement(nullptr), + m_nNextFontId( 1 ), + m_nNextGCId( 1 ), + m_nPages(0), + m_nNextZOrder( 1 ), + m_xStatusIndicator( xStat ) +{ + FontAttributes aDefFont; + aDefFont.familyName = "Helvetica"; + aDefFont.fontWeight = u"normal"; + aDefFont.isItalic = false; + aDefFont.size = 10*PDFI_OUTDEV_RESOLUTION/72; + m_aIdToFont.insert({0, aDefFont}); + m_aFontToId.insert({aDefFont, 0}); + + GraphicsContext aDefGC; + m_aGCStack.push_back( aDefGC ); + m_aGCToId.insert({aDefGC, 0}); + m_aIdToGC.insert({0, aDefGC}); +} + +void PDFIProcessor::setPageNum( sal_Int32 nPages ) +{ + m_nPages = nPages; +} + + +void PDFIProcessor::pushState() +{ + GraphicsContextStack::value_type const a(m_aGCStack.back()); + m_aGCStack.push_back(a); +} + +void PDFIProcessor::popState() +{ + m_aGCStack.pop_back(); +} + +void PDFIProcessor::setFlatness( double value ) +{ + getCurrentContext().Flatness = value; +} + +void PDFIProcessor::setTransformation( const geometry::AffineMatrix2D& rMatrix ) +{ + basegfx::unotools::homMatrixFromAffineMatrix( + getCurrentContext().Transformation, + rMatrix ); +} + +void PDFIProcessor::setLineDash( const uno::Sequence<double>& dashes, + double /*start*/ ) +{ + // TODO(F2): factor in start offset + GraphicsContext& rContext( getCurrentContext() ); + comphelper::sequenceToContainer(rContext.DashArray,dashes); +} + +void PDFIProcessor::setLineJoin(sal_Int8 nJoin) +{ + getCurrentContext().LineJoin = nJoin; +} + +void PDFIProcessor::setLineCap(sal_Int8 nCap) +{ + getCurrentContext().LineCap = nCap; +} + +void PDFIProcessor::setMiterLimit(double) +{ + SAL_WARN("sdext.pdfimport", "PDFIProcessor::setMiterLimit(): not supported by ODF"); +} + +void PDFIProcessor::setLineWidth(double nWidth) +{ + getCurrentContext().LineWidth = nWidth; +} + +void PDFIProcessor::setFillColor( const rendering::ARGBColor& rColor ) +{ + getCurrentContext().FillColor = rColor; +} + +void PDFIProcessor::setStrokeColor( const rendering::ARGBColor& rColor ) +{ + getCurrentContext().LineColor = rColor; +} + +void PDFIProcessor::setFont( const FontAttributes& i_rFont ) +{ + FontAttributes aChangedFont( i_rFont ); + GraphicsContext& rGC=getCurrentContext(); + // for text render modes, please see PDF reference manual + if (rGC.TextRenderMode == 1) + { + aChangedFont.isOutline = true; + } + else if (rGC.TextRenderMode == 2) + { + // tdf#81484: faux bold is represented as "stroke+fill" (while using the same color for both stroke and fill) in pdf. + // Convert to bold instead if the stroke color is the same as the fill color, + // otherwise it should be outline. + if (getCurrentContext().LineColor == getCurrentContext().FillColor) + aChangedFont.fontWeight = u"bold"; + else + aChangedFont.isOutline = true; + } + FontToIdMap::const_iterator it = m_aFontToId.find( aChangedFont ); + if( it != m_aFontToId.end() ) + rGC.FontId = it->second; + else + { + m_aFontToId[ aChangedFont ] = m_nNextFontId; + m_aIdToFont[ m_nNextFontId ] = aChangedFont; + rGC.FontId = m_nNextFontId; + m_nNextFontId++; + } +} + +void PDFIProcessor::setTextRenderMode( sal_Int32 i_nMode ) +{ + GraphicsContext& rGC=getCurrentContext(); + rGC.TextRenderMode = i_nMode; + IdToFontMap::iterator it = m_aIdToFont.find( rGC.FontId ); + if( it != m_aIdToFont.end() ) + setFont( it->second ); +} + +sal_Int32 PDFIProcessor::getFontId( const FontAttributes& rAttr ) const +{ + const sal_Int32 nCurFont = getCurrentContext().FontId; + const_cast<PDFIProcessor*>(this)->setFont( rAttr ); + const sal_Int32 nFont = getCurrentContext().FontId; + const_cast<PDFIProcessor*>(this)->getCurrentContext().FontId = nCurFont; + + return nFont; +} + +// line diagnose block - start +void PDFIProcessor::processGlyphLine() +{ + if (m_GlyphsList.empty()) + return; + + double spaceDetectBoundary = 0.0; + + // Try to find space glyph and its width + for (CharGlyph & i : m_GlyphsList) + { + OUString& glyph = i.getGlyph(); + + sal_Unicode ch = '\0'; + if (!glyph.isEmpty()) + ch = glyph[0]; + + if ((ch == 0x20) || (ch == 0xa0)) + { + double spaceWidth = i.getWidth(); + spaceDetectBoundary = spaceWidth * 0.5; + break; + } + } + + // If space glyph is not found, use average glyph width instead + if (spaceDetectBoundary == 0.0) + { + double avgGlyphWidth = 0.0; + for (const CharGlyph & i : m_GlyphsList) + avgGlyphWidth += i.getWidth(); + avgGlyphWidth /= m_GlyphsList.size(); + spaceDetectBoundary = avgGlyphWidth * 0.2; + } + + FrameElement* frame = ElementFactory::createFrameElement( + m_GlyphsList[0].getCurElement(), + getGCId(m_GlyphsList[0].getGC())); + frame->ZOrder = m_nNextZOrder++; + frame->IsForText = true; + frame->FontSize = getFont(m_GlyphsList[0].getGC().FontId).size; + ParagraphElement* para = ElementFactory::createParagraphElement(frame); + + for (size_t i = 0; i < m_GlyphsList.size(); i++) + { + bool prependSpace = false; + TextElement* text = ElementFactory::createTextElement( + para, + getGCId(m_GlyphsList[i].getGC()), + m_GlyphsList[i].getGC().FontId); + if (i == 0) + { + text->x = m_GlyphsList[0].getGC().Transformation.get(0, 2); + text->y = m_GlyphsList[0].getGC().Transformation.get(1, 2); + text->w = 0; + text->h = 0; + para->updateGeometryWith(text); + frame->updateGeometryWith(para); + } + else + { + double spaceSize = m_GlyphsList[i].getPrevSpaceWidth(); + prependSpace = spaceSize > spaceDetectBoundary; + } + if (prependSpace) + text->Text.append(" "); + text->Text.append(m_GlyphsList[i].getGlyph()); + } + + m_GlyphsList.clear(); +} + +void PDFIProcessor::drawGlyphs( const OUString& rGlyphs, + const geometry::RealRectangle2D& rRect, + const geometry::Matrix2D& rFontMatrix, + double fontSize) +{ + double ascent = getFont(getCurrentContext().FontId).ascent; + + basegfx::B2DHomMatrix fontMatrix( + rFontMatrix.m00, rFontMatrix.m01, 0.0, + rFontMatrix.m10, rFontMatrix.m11, 0.0); + fontMatrix.scale(fontSize, fontSize); + + basegfx::B2DHomMatrix totalTextMatrix1(fontMatrix); + basegfx::B2DHomMatrix totalTextMatrix2(fontMatrix); + totalTextMatrix1.translate(rRect.X1, rRect.Y1); + totalTextMatrix2.translate(rRect.X2, rRect.Y2); + + basegfx::B2DHomMatrix corrMatrix; + corrMatrix.scale(1.0, -1.0); + corrMatrix.translate(0.0, ascent); + totalTextMatrix1 = totalTextMatrix1 * corrMatrix; + totalTextMatrix2 = totalTextMatrix2 * corrMatrix; + + totalTextMatrix1 *= getCurrentContext().Transformation; + totalTextMatrix2 *= getCurrentContext().Transformation; + + basegfx::B2DHomMatrix invMatrix(totalTextMatrix1); + basegfx::B2DHomMatrix invPrevMatrix(prevTextMatrix); + invMatrix.invert(); + invPrevMatrix.invert(); + basegfx::B2DHomMatrix offsetMatrix1(totalTextMatrix1); + basegfx::B2DHomMatrix offsetMatrix2(totalTextMatrix2); + offsetMatrix1 *= invPrevMatrix; + offsetMatrix2 *= invMatrix; + + double charWidth = offsetMatrix2.get(0, 2); + double prevSpaceWidth = offsetMatrix1.get(0, 2) - prevCharWidth; + + if ((totalTextMatrix1.get(0, 0) != prevTextMatrix.get(0, 0)) || + (totalTextMatrix1.get(0, 1) != prevTextMatrix.get(0, 1)) || + (totalTextMatrix1.get(1, 0) != prevTextMatrix.get(1, 0)) || + (totalTextMatrix1.get(1, 1) != prevTextMatrix.get(1, 1)) || + (offsetMatrix1.get(0, 2) < 0.0) || + (prevSpaceWidth > prevCharWidth * 1.3) || + (!basegfx::fTools::equalZero(offsetMatrix1.get(1, 2), 0.0001))) + { + processGlyphLine(); + } + + CharGlyph aGlyph(m_pCurElement, getCurrentContext(), charWidth, prevSpaceWidth, rGlyphs); + aGlyph.getGC().Transformation = totalTextMatrix1; + m_GlyphsList.push_back(aGlyph); + + prevCharWidth = charWidth; + prevTextMatrix = totalTextMatrix1; +} + +void PDFIProcessor::endText() +{ + TextElement* pText = dynamic_cast<TextElement*>(m_pCurElement); + if( pText ) + m_pCurElement = pText->Parent; +} + +void PDFIProcessor::setupImage(ImageId nImage) +{ + const GraphicsContext& rGC(getCurrentContext()); + + basegfx::B2DTuple aScale, aTranslation; + double fRotate, fShearX; + rGC.Transformation.decompose(aScale, aTranslation, fRotate, fShearX); + + const sal_Int32 nGCId = getGCId(rGC); + FrameElement* pFrame = ElementFactory::createFrameElement( m_pCurElement, nGCId ); + ImageElement* pImageElement = ElementFactory::createImageElement( pFrame, nGCId, nImage ); + pFrame->x = pImageElement->x = aTranslation.getX(); + pFrame->y = pImageElement->y = aTranslation.getY(); + pFrame->w = pImageElement->w = aScale.getX(); + pFrame->h = pImageElement->h = aScale.getY(); + pFrame->ZOrder = m_nNextZOrder++; + + // Poppler wrapper takes into account that vertical axes of PDF and ODF are opposite, + // and it flips matrix vertically (see poppler's GfxState::GfxState()). + // But image internal vertical axis is independent of PDF vertical axis direction, + // so arriving matrix is extra-flipped relative to image. + // We force vertical flip here to compensate that. + pFrame->MirrorVertical = true; +} + +void PDFIProcessor::drawMask(const uno::Sequence<beans::PropertyValue>& xBitmap, + bool /*bInvert*/ ) +{ + // TODO(F3): Handle mask and inversion + setupImage( m_aImages.addImage(xBitmap) ); +} + +void PDFIProcessor::drawImage(const uno::Sequence<beans::PropertyValue>& xBitmap ) +{ + setupImage( m_aImages.addImage(xBitmap) ); +} + +void PDFIProcessor::drawColorMaskedImage(const uno::Sequence<beans::PropertyValue>& xBitmap, + const uno::Sequence<uno::Any>& /*xMaskColors*/ ) +{ + // TODO(F3): Handle mask colors + setupImage( m_aImages.addImage(xBitmap) ); +} + +void PDFIProcessor::drawMaskedImage(const uno::Sequence<beans::PropertyValue>& xBitmap, + const uno::Sequence<beans::PropertyValue>& /*xMask*/, + bool /*bInvertMask*/) +{ + // TODO(F3): Handle mask and inversion + setupImage( m_aImages.addImage(xBitmap) ); +} + +void PDFIProcessor::drawAlphaMaskedImage(const uno::Sequence<beans::PropertyValue>& xBitmap, + const uno::Sequence<beans::PropertyValue>& /*xMask*/) +{ + // TODO(F3): Handle mask + + setupImage( m_aImages.addImage(xBitmap) ); + +} + +void PDFIProcessor::strokePath( const uno::Reference< rendering::XPolyPolygon2D >& rPath ) +{ + basegfx::B2DPolyPolygon aPoly=basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath); + aPoly.transform(getCurrentContext().Transformation); + + PolyPolyElement* pPoly = ElementFactory::createPolyPolyElement( + m_pCurElement, + getGCId(getCurrentContext()), + aPoly, + PATH_STROKE ); + pPoly->updateGeometry(); + pPoly->ZOrder = m_nNextZOrder++; +} + +void PDFIProcessor::fillPath( const uno::Reference< rendering::XPolyPolygon2D >& rPath ) +{ + basegfx::B2DPolyPolygon aPoly=basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath); + aPoly.transform(getCurrentContext().Transformation); + + PolyPolyElement* pPoly = ElementFactory::createPolyPolyElement( + m_pCurElement, + getGCId(getCurrentContext()), + aPoly, + PATH_FILL ); + pPoly->updateGeometry(); + pPoly->ZOrder = m_nNextZOrder++; +} + +void PDFIProcessor::eoFillPath( const uno::Reference< rendering::XPolyPolygon2D >& rPath ) +{ + basegfx::B2DPolyPolygon aPoly=basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath); + aPoly.transform(getCurrentContext().Transformation); + + PolyPolyElement* pPoly = ElementFactory::createPolyPolyElement( + m_pCurElement, + getGCId(getCurrentContext()), + aPoly, + PATH_EOFILL ); + pPoly->updateGeometry(); + pPoly->ZOrder = m_nNextZOrder++; +} + +void PDFIProcessor::intersectClip(const uno::Reference< rendering::XPolyPolygon2D >& rPath) +{ + // TODO(F3): interpret fill mode + basegfx::B2DPolyPolygon aNewClip = basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath); + aNewClip.transform(getCurrentContext().Transformation); + basegfx::B2DPolyPolygon aCurClip = getCurrentContext().Clip; + + if( aCurClip.count() ) // #i92985# adapted API from (..., false, false) to (..., true, false) + aNewClip = basegfx::utils::clipPolyPolygonOnPolyPolygon( aCurClip, aNewClip, true, false ); + + getCurrentContext().Clip = aNewClip; +} + +void PDFIProcessor::intersectEoClip(const uno::Reference< rendering::XPolyPolygon2D >& rPath) +{ + // TODO(F3): interpret fill mode + basegfx::B2DPolyPolygon aNewClip = basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath); + aNewClip.transform(getCurrentContext().Transformation); + basegfx::B2DPolyPolygon aCurClip = getCurrentContext().Clip; + + if( aCurClip.count() ) // #i92985# adapted API from (..., false, false) to (..., true, false) + aNewClip = basegfx::utils::clipPolyPolygonOnPolyPolygon( aCurClip, aNewClip, true, false ); + + getCurrentContext().Clip = aNewClip; +} + +void PDFIProcessor::hyperLink( const geometry::RealRectangle2D& rBounds, + const OUString& rURI ) +{ + if( !rURI.isEmpty() ) + { + HyperlinkElement* pLink = ElementFactory::createHyperlinkElement( + &m_pCurPage->Hyperlinks, + rURI ); + pLink->x = rBounds.X1; + pLink->y = rBounds.Y1; + pLink->w = rBounds.X2-rBounds.X1; + pLink->h = rBounds.Y2-rBounds.Y1; + } +} + +const FontAttributes& PDFIProcessor::getFont( sal_Int32 nFontId ) const +{ + IdToFontMap::const_iterator it = m_aIdToFont.find( nFontId ); + if( it == m_aIdToFont.end() ) + it = m_aIdToFont.find( 0 ); + return it->second; +} + +sal_Int32 PDFIProcessor::getGCId( const GraphicsContext& rGC ) +{ + sal_Int32 nGCId = 0; + auto it = m_aGCToId.find( rGC ); + if( it != m_aGCToId.end() ) + nGCId = it->second; + else + { + m_aGCToId.insert({rGC, m_nNextGCId}); + m_aIdToGC.insert({m_nNextGCId, rGC}); + nGCId = m_nNextGCId; + m_nNextGCId++; + } + + return nGCId; +} + +const GraphicsContext& PDFIProcessor::getGraphicsContext( sal_Int32 nGCId ) const +{ + auto it = m_aIdToGC.find( nGCId ); + if( it == m_aIdToGC.end() ) + it = m_aIdToGC.find( 0 ); + return it->second; +} + +void PDFIProcessor::endPage() +{ + processGlyphLine(); // draw last line + if( m_xStatusIndicator.is() + && m_pCurPage + && m_pCurPage->PageNumber == m_nPages + ) + m_xStatusIndicator->end(); +} + +void PDFIProcessor::startPage( const geometry::RealSize2D& rSize ) +{ + // initial clip is to page bounds + getCurrentContext().Clip = basegfx::B2DPolyPolygon( + basegfx::utils::createPolygonFromRect( + basegfx::B2DRange( 0, 0, rSize.Width, rSize.Height ))); + + sal_Int32 nNextPageNr = m_pCurPage ? m_pCurPage->PageNumber+1 : 1; + if( m_xStatusIndicator.is() ) + { + if( nNextPageNr == 1 ) + startIndicator( " " ); + m_xStatusIndicator->setValue( nNextPageNr ); + } + m_pCurPage = ElementFactory::createPageElement(m_pDocument.get(), nNextPageNr); + m_pCurElement = m_pCurPage; + m_pCurPage->w = rSize.Width; + m_pCurPage->h = rSize.Height; + m_nNextZOrder = 1; + + +} + +void PDFIProcessor::emit( XmlEmitter& rEmitter, + const TreeVisitorFactory& rVisitorFactory ) +{ +#if OSL_DEBUG_LEVEL > 0 + m_pDocument->emitStructure( 0 ); +#endif + + ElementTreeVisitorSharedPtr optimizingVisitor( + rVisitorFactory.createOptimizingVisitor(*this)); + // FIXME: localization + startIndicator( " " ); + m_pDocument->visitedBy( *optimizingVisitor, std::list<std::unique_ptr<Element>>::const_iterator()); + +#if OSL_DEBUG_LEVEL > 0 + m_pDocument->emitStructure( 0 ); +#endif + + // get styles + StyleContainer aStyles; + ElementTreeVisitorSharedPtr finalizingVisitor( + rVisitorFactory.createStyleCollectingVisitor(aStyles,*this)); + // FIXME: localization + + m_pDocument->visitedBy( *finalizingVisitor, std::list<std::unique_ptr<Element>>::const_iterator() ); + + EmitContext aContext( rEmitter, aStyles, m_aImages, *this, m_xStatusIndicator, m_xContext ); + ElementTreeVisitorSharedPtr aEmittingVisitor( + rVisitorFactory.createEmittingVisitor(aContext)); + + PropertyMap aProps; + // document prolog + #define OASIS_STR "urn:oasis:names:tc:opendocument:xmlns:" + aProps[ "xmlns:office" ] = OASIS_STR "office:1.0" ; + aProps[ "xmlns:style" ] = OASIS_STR "style:1.0" ; + aProps[ "xmlns:text" ] = OASIS_STR "text:1.0" ; + aProps[ "xmlns:svg" ] = OASIS_STR "svg-compatible:1.0" ; + aProps[ "xmlns:table" ] = OASIS_STR "table:1.0" ; + aProps[ "xmlns:draw" ] = OASIS_STR "drawing:1.0" ; + aProps[ "xmlns:fo" ] = OASIS_STR "xsl-fo-compatible:1.0" ; + aProps[ "xmlns:xlink"] = "http://www.w3.org/1999/xlink"; + aProps[ "xmlns:dc"] = "http://purl.org/dc/elements/1.1/"; + aProps[ "xmlns:number"] = OASIS_STR "datastyle:1.0" ; + aProps[ "xmlns:presentation"] = OASIS_STR "presentation:1.0" ; + aProps[ "xmlns:math"] = "http://www.w3.org/1998/Math/MathML"; + aProps[ "xmlns:form"] = OASIS_STR "form:1.0" ; + aProps[ "xmlns:script"] = OASIS_STR "script:1.0" ; + aProps[ "xmlns:dom"] = "http://www.w3.org/2001/xml-events"; + aProps[ "xmlns:xforms"] = "http://www.w3.org/2002/xforms"; + aProps[ "xmlns:xsd"] = "http://www.w3.org/2001/XMLSchema"; + aProps[ "xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance"; + aProps[ "office:version" ] = "1.0"; + + aContext.rEmitter.beginTag( "office:document", aProps ); + + // emit style list + aStyles.emit( aContext, *aEmittingVisitor ); + + m_pDocument->visitedBy( *aEmittingVisitor, std::list<std::unique_ptr<Element>>::const_iterator() ); + aContext.rEmitter.endTag( "office:document" ); + endIndicator(); +} + +void PDFIProcessor::startIndicator( const OUString& rText ) +{ + sal_Int32 nElements = m_nPages; + if( !m_xStatusIndicator.is() ) + return; + + sal_Int32 nLength = rText.getLength(); + OUStringBuffer aStr( nLength*2 ); + const sal_Unicode* pText = rText.getStr(); + for( int i = 0; i < nLength; i++ ) + { + if( nLength-i > 1&& + pText[i] == '%' && + pText[i+1] == 'd' + ) + { + aStr.append( nElements ); + i++; + } + else + aStr.append( pText[i] ); + } + m_xStatusIndicator->start( aStr.makeStringAndClear(), nElements ); +} + +void PDFIProcessor::endIndicator() +{ + if( m_xStatusIndicator.is() ) + m_xStatusIndicator->end(); +} + +static bool lr_tb_sort( std::unique_ptr<Element> const & pLeft, std::unique_ptr<Element> const & pRight ) +{ + // Ensure irreflexivity (which could be compromised if h or w is negative): + if (pLeft == pRight) + return false; + + // first: top-bottom sorting + + // Note: allow for 10% overlap on text lines since text lines are usually + // of the same order as font height whereas the real paint area + // of text is usually smaller + double fudge_factor_left = 0.0, fudge_factor_right = 0.0; + if( dynamic_cast< TextElement* >(pLeft.get()) ) + fudge_factor_left = 0.1; + if (dynamic_cast< TextElement* >(pRight.get())) + fudge_factor_right = 0.1; + + // Allow negative height + double lower_boundary_left = pLeft->y + std::max(pLeft->h, 0.0) - fabs(pLeft->h) * fudge_factor_left; + double lower_boundary_right = pRight->y + std::max(pRight->h, 0.0) - fabs(pRight->h) * fudge_factor_right; + double upper_boundary_left = pLeft->y + std::min(pLeft->h, 0.0); + double upper_boundary_right = pRight->y + std::min(pRight->h, 0.0); + // if left's lower boundary is above right's upper boundary + // then left is smaller + if( lower_boundary_left < upper_boundary_right ) + return true; + // if right's lower boundary is above left's upper boundary + // then left is definitely not smaller + if( lower_boundary_right < upper_boundary_left ) + return false; + + // Allow negative width + double left_boundary_left = pLeft->y + std::min(pLeft->w, 0.0); + double left_boundary_right = pRight->y + std::min(pRight->w, 0.0); + double right_boundary_left = pLeft->y + std::max(pLeft->w, 0.0); + double right_boundary_right = pRight->y + std::max(pRight->w, 0.0); + // by now we have established that left and right are inside + // a "line", that is they have vertical overlap + // second: left-right sorting + // if left's right boundary is left to right's left boundary + // then left is smaller + if( right_boundary_left < left_boundary_right ) + return true; + // if right's right boundary is left to left's left boundary + // then left is definitely not smaller + if( right_boundary_right < left_boundary_left ) + return false; + + // here we have established vertical and horizontal overlap + // so sort left first, top second + if( pLeft->x < pRight->x ) + return true; + if( pRight->x < pLeft->x ) + return false; + if( pLeft->y < pRight->y ) + return true; + + return false; +} + +void PDFIProcessor::sortElements(Element* pEle) +{ + if( pEle->Children.empty() ) + return; + + // sort method from std::list is equivalent to stable_sort + // See S Meyers, Effective STL + pEle->Children.sort(lr_tb_sort); +} + +/* Produce mirrored-image for each code point which has the Bidi_Mirrored property, within a string. + This need to be done in forward order. +*/ +OUString PDFIProcessor::SubstituteBidiMirrored(const OUString& rString) +{ + const sal_Int32 nLen = rString.getLength(); + OUStringBuffer aMirror(nLen); + + for (sal_Int32 i = 0; i < nLen;) { + const sal_uInt32 nCodePoint = rString.iterateCodePoints(&i); + aMirror.appendUtf32(unicode::GetMirroredChar(nCodePoint)); + } + return aMirror.makeStringAndClear(); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/style.cxx b/sdext/source/pdfimport/tree/style.cxx new file mode 100644 index 000000000..b1cb02a48 --- /dev/null +++ b/sdext/source/pdfimport/tree/style.cxx @@ -0,0 +1,248 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include "style.hxx" +#include <genericelements.hxx> +#include <xmlemitter.hxx> +#include <rtl/ustrbuf.hxx> + +#include <algorithm> +#include <string_view> + +using namespace pdfi; + + +StyleContainer::StyleContainer() : + m_nNextId( 1 ) +{ +} + +sal_Int32 StyleContainer::impl_getStyleId( const Style& rStyle, bool bSubStyle ) +{ + sal_Int32 nRet = -1; + + // construct HashedStyle to find or insert + HashedStyle aSearchStyle; + aSearchStyle.Name = rStyle.Name; + aSearchStyle.Properties = rStyle.Properties; + aSearchStyle.Contents = rStyle.Contents; + aSearchStyle.ContainedElement = rStyle.ContainedElement; + for(Style* pSubStyle : rStyle.SubStyles) + aSearchStyle.SubStyles.push_back( impl_getStyleId( *pSubStyle, true ) ); + + std::unordered_map< HashedStyle, sal_Int32, StyleHash >::iterator it = + m_aStyleToId.find( aSearchStyle ); + + if( it != m_aStyleToId.end() ) + { + nRet = it->second; + RefCountedHashedStyle& rFound = m_aIdToStyle[ nRet ]; + // increase refcount on this style + rFound.RefCount++; + if( ! bSubStyle ) + rFound.style.IsSubStyle = false; + } + else + { + nRet = m_nNextId++; + // create new style + RefCountedHashedStyle& rNew = m_aIdToStyle[ nRet ]; + rNew.style = aSearchStyle; + rNew.RefCount = 1; + rNew.style.IsSubStyle = bSubStyle; + // fill the style hash to find the id + m_aStyleToId[ rNew.style ] = nRet; + } + return nRet; +} + +sal_Int32 StyleContainer::getStandardStyleId( std::string_view rName ) +{ + PropertyMap aProps; + aProps[ "style:family" ] = OStringToOUString( rName, RTL_TEXTENCODING_UTF8 ); + aProps[ "style:name" ] = "standard"; + + Style aStyle( "style:style", std::move(aProps) ); + return getStyleId( aStyle ); +} + +const PropertyMap* StyleContainer::getProperties( sal_Int32 nStyleId ) const +{ + std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator it = + m_aIdToStyle.find( nStyleId ); + return it != m_aIdToStyle.end() ? &(it->second.style.Properties) : nullptr; +} + +sal_Int32 StyleContainer::setProperties( sal_Int32 nStyleId, PropertyMap&& rNewProps ) +{ + sal_Int32 nRet = -1; + std::unordered_map< sal_Int32, RefCountedHashedStyle >::iterator it = + m_aIdToStyle.find( nStyleId ); + if( it != m_aIdToStyle.end() ) + { + if( it->second.RefCount == 1 ) + { + nRet = it->first; + // erase old hash to id mapping + m_aStyleToId.erase( it->second.style ); + // change properties + it->second.style.Properties = std::move(rNewProps); + // fill in new hash to id mapping + m_aStyleToId[ it->second.style ] = nRet; + } + else + { + // decrease refcount on old instance + it->second.RefCount--; + // acquire new HashedStyle + HashedStyle aSearchStyle; + aSearchStyle.Name = it->second.style.Name; + aSearchStyle.Properties = std::move(rNewProps); + aSearchStyle.Contents = it->second.style.Contents; + aSearchStyle.ContainedElement = it->second.style.ContainedElement; + aSearchStyle.SubStyles = it->second.style.SubStyles; + aSearchStyle.IsSubStyle = it->second.style.IsSubStyle; + + // find out whether this new style already exists + std::unordered_map< HashedStyle, sal_Int32, StyleHash >::iterator new_it = + m_aStyleToId.find( aSearchStyle ); + if( new_it != m_aStyleToId.end() ) + { + nRet = new_it->second; + m_aIdToStyle[ nRet ].RefCount++; + } + else + { + nRet = m_nNextId++; + // create new style with new id + RefCountedHashedStyle& rNew = m_aIdToStyle[ nRet ]; + rNew.style = aSearchStyle; + rNew.RefCount = 1; + // fill style to id hash + m_aStyleToId[ aSearchStyle ] = nRet; + } + } + } + return nRet; +} + +OUString StyleContainer::getStyleName( sal_Int32 nStyle ) const +{ + OUStringBuffer aRet( 64 ); + + std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator style_it = + m_aIdToStyle.find( nStyle ); + if( style_it != m_aIdToStyle.end() ) + { + const HashedStyle& rStyle = style_it->second.style; + + PropertyMap::const_iterator name_it = rStyle.Properties.find( "style:name" ); + if( name_it != rStyle.Properties.end() ) + aRet.append( name_it->second ); + else + { + PropertyMap::const_iterator fam_it = rStyle.Properties.find( "style:family" ); + OUString aStyleName; + if( fam_it != rStyle.Properties.end() ) + { + aStyleName = fam_it->second; + } + else + aStyleName = OStringToOUString( rStyle.Name, RTL_TEXTENCODING_ASCII_US ); + sal_Int32 nIndex = aStyleName.lastIndexOf( ':' ); + aRet.append( aStyleName.subView(nIndex+1) ); + aRet.append( nStyle ); + } + } + else + { + aRet.append( "invalid style id " ); + aRet.append( nStyle ); + } + + return aRet.makeStringAndClear(); +} + +void StyleContainer::impl_emitStyle( sal_Int32 nStyleId, + EmitContext& rContext, + ElementTreeVisitor& rContainedElemVisitor ) +{ + std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator it = m_aIdToStyle.find( nStyleId ); + if( it == m_aIdToStyle.end() ) + return; + + const HashedStyle& rStyle = it->second.style; + PropertyMap aProps( rStyle.Properties ); + if( !rStyle.IsSubStyle ) + aProps[ "style:name" ] = getStyleName( nStyleId ); + if (rStyle.Name == "draw:stroke-dash") + aProps[ "draw:name" ] = aProps[ "style:name" ]; + rContext.rEmitter.beginTag( rStyle.Name.getStr(), aProps ); + + for(sal_Int32 nSubStyle : rStyle.SubStyles) + impl_emitStyle( nSubStyle, rContext, rContainedElemVisitor ); + if( !rStyle.Contents.isEmpty() ) + rContext.rEmitter.write( rStyle.Contents ); + if( rStyle.ContainedElement ) + rStyle.ContainedElement->visitedBy( rContainedElemVisitor, + std::list<std::unique_ptr<Element>>::iterator() ); + rContext.rEmitter.endTag( rStyle.Name.getStr() ); +} + +void StyleContainer::emit( EmitContext& rContext, + ElementTreeVisitor& rContainedElemVisitor ) +{ + std::vector< sal_Int32 > aMasterPageSection, aAutomaticStyleSection, aOfficeStyleSection; + for( const auto& rEntry : m_aIdToStyle ) + { + if( ! rEntry.second.style.IsSubStyle ) + { + if( rEntry.second.style.Name == "style:master-page" ) + aMasterPageSection.push_back( rEntry.first ); + else if( getStyleName( rEntry.first ) == "standard" ) + aOfficeStyleSection.push_back( rEntry.first ); + else + aAutomaticStyleSection.push_back( rEntry.first ); + } + } + + if( ! aMasterPageSection.empty() ) + std::stable_sort( aMasterPageSection.begin(), aMasterPageSection.end(), StyleIdNameSort(&m_aIdToStyle) ); + if( ! aAutomaticStyleSection.empty() ) + std::stable_sort( aAutomaticStyleSection.begin(), aAutomaticStyleSection.end(), StyleIdNameSort(&m_aIdToStyle) ); + if( ! aOfficeStyleSection.empty() ) + std::stable_sort( aOfficeStyleSection.begin(), aOfficeStyleSection.end(), StyleIdNameSort(&m_aIdToStyle) ); + + int n = 0, nElements = 0; + rContext.rEmitter.beginTag( "office:styles", PropertyMap() ); + for( n = 0, nElements = aOfficeStyleSection.size(); n < nElements; n++ ) + impl_emitStyle( aOfficeStyleSection[n], rContext, rContainedElemVisitor ); + rContext.rEmitter.endTag( "office:styles" ); + rContext.rEmitter.beginTag( "office:automatic-styles", PropertyMap() ); + for( n = 0, nElements = aAutomaticStyleSection.size(); n < nElements; n++ ) + impl_emitStyle( aAutomaticStyleSection[n], rContext, rContainedElemVisitor ); + rContext.rEmitter.endTag( "office:automatic-styles" ); + rContext.rEmitter.beginTag( "office:master-styles", PropertyMap() ); + for( n = 0, nElements = aMasterPageSection.size(); n < nElements; n++ ) + impl_emitStyle( aMasterPageSection[n], rContext, rContainedElemVisitor ); + rContext.rEmitter.endTag( "office:master-styles" ); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/style.hxx b/sdext/source/pdfimport/tree/style.hxx new file mode 100644 index 000000000..572be241e --- /dev/null +++ b/sdext/source/pdfimport/tree/style.hxx @@ -0,0 +1,166 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_STYLE_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_STYLE_HXX + +#include <pdfihelper.hxx> +#include <numeric> +#include <string_view> +#include <unordered_map> +#include <vector> +#include <rtl/ustring.hxx> +#include <rtl/string.hxx> +#include <treevisiting.hxx> + +namespace pdfi +{ + struct Element; + struct EmitContext; + + class StyleContainer + { + public: + struct Style + { + OString Name; + PropertyMap Properties; + OUString Contents; + Element* ContainedElement; + std::vector< Style* > SubStyles; + + Style( const OString& rName, PropertyMap&& rProps ) : + Name( rName ), + Properties( std::move(rProps) ), + ContainedElement( nullptr ) + {} + }; + + private: + struct HashedStyle + { + OString Name; + PropertyMap Properties; + OUString Contents; + Element* ContainedElement; + std::vector<sal_Int32> SubStyles; + + bool IsSubStyle; + + HashedStyle() : ContainedElement( nullptr ), IsSubStyle( true ) {} + + size_t hashCode() const + { + size_t nRet = std::accumulate(Properties.begin(), Properties.end(), size_t(Name.hashCode()), + [](const size_t& sum, const PropertyMap::value_type& rEntry) { + return sum ^ size_t(rEntry.first.hashCode()) ^ size_t(rEntry.second.hashCode()); + }); + nRet ^= size_t(Contents.hashCode()); + nRet ^= reinterpret_cast<size_t>(ContainedElement); + for( size_t n = 0; n < SubStyles.size(); ++n ) + nRet ^= size_t(SubStyles[n]); + return nRet; + } + + bool operator==(const HashedStyle& rRight) const + { + if( Name != rRight.Name || + Properties != rRight.Properties || + Contents != rRight.Contents || + ContainedElement != rRight.ContainedElement || + SubStyles.size() != rRight.SubStyles.size() + ) + return false; + for( size_t n = 0; n < SubStyles.size(); ++n ) + { + if( SubStyles[n] != rRight.SubStyles[n] ) + return false; + } + return true; + } + }; + + struct RefCountedHashedStyle { + HashedStyle style; + sal_Int32 RefCount = 0; + }; + + struct StyleHash; + friend struct StyleHash; + struct StyleHash + { + size_t operator()( const StyleContainer::HashedStyle& rStyle ) const + { + return rStyle.hashCode(); + } + }; + + struct StyleIdNameSort; + friend struct StyleIdNameSort; + struct StyleIdNameSort + { + const std::unordered_map< sal_Int32, RefCountedHashedStyle >* m_pMap; + + explicit StyleIdNameSort( const std::unordered_map< sal_Int32, RefCountedHashedStyle >* pMap ) : + m_pMap(pMap) + {} + bool operator()( sal_Int32 nLeft, sal_Int32 nRight ) + { + const std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator left_it = + m_pMap->find( nLeft ); + const std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator right_it = + m_pMap->find( nRight ); + if( left_it == m_pMap->end() ) + return false; + else if( right_it == m_pMap->end() ) + return true; + else + return left_it->second.style.Name < right_it->second.style.Name; + } + }; + + sal_Int32 m_nNextId; + std::unordered_map< sal_Int32, RefCountedHashedStyle > m_aIdToStyle; + std::unordered_map< HashedStyle, sal_Int32, StyleHash > m_aStyleToId; + + void impl_emitStyle( sal_Int32 nStyleId, + EmitContext& rContext, + ElementTreeVisitor& rContainedElemVisitor ); + + public: + StyleContainer(); + + void emit( EmitContext& rContext, + ElementTreeVisitor& rContainedElemVisitor ); + + sal_Int32 impl_getStyleId( const Style& rStyle, bool bSubStyle ); + sal_Int32 getStyleId( const Style& rStyle ) + { return impl_getStyleId( rStyle, false ); } + sal_Int32 getStandardStyleId( std::string_view rFamily ); + + // returns NULL for an invalid style id + const PropertyMap* getProperties( sal_Int32 nStyleId ) const; + sal_Int32 setProperties( sal_Int32 nStyleId, PropertyMap&& rNewProps ); + OUString getStyleName( sal_Int32 nStyle ) const; + }; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/treevisitorfactory.cxx b/sdext/source/pdfimport/tree/treevisitorfactory.cxx new file mode 100644 index 000000000..495bf0bcb --- /dev/null +++ b/sdext/source/pdfimport/tree/treevisitorfactory.cxx @@ -0,0 +1,111 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <treevisitorfactory.hxx> +#include "writertreevisiting.hxx" +#include "drawtreevisiting.hxx" + +namespace pdfi +{ + namespace { + + struct WriterTreeVisitorFactory : public TreeVisitorFactory + { + WriterTreeVisitorFactory() {} + + virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor& rProc) const override + { + return std::make_shared<WriterXmlOptimizer>(rProc); + } + + virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor( + StyleContainer& rStyles, + PDFIProcessor& rProc ) const override + { + return std::make_shared<WriterXmlFinalizer>(rStyles,rProc); + } + + virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext& rEmitContext) const override + { + return std::make_shared<WriterXmlEmitter>(rEmitContext); + } + }; + + struct ImpressTreeVisitorFactory : public TreeVisitorFactory + { + ImpressTreeVisitorFactory() {} + + virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor& rProc) const override + { + return std::make_shared<DrawXmlOptimizer>(rProc); + } + + virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor( + StyleContainer& rStyles, + PDFIProcessor& rProc ) const override + { + return std::make_shared<DrawXmlFinalizer>(rStyles,rProc); + } + + virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext& rEmitContext) const override + { + return std::make_shared<DrawXmlEmitter>(rEmitContext, DrawXmlEmitter::IMPRESS_DOC); + } + }; + + struct DrawTreeVisitorFactory : public TreeVisitorFactory + { + DrawTreeVisitorFactory() {} + + virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor& rProc) const override + { + return std::make_shared<DrawXmlOptimizer>(rProc); + } + + virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor( + StyleContainer& rStyles, + PDFIProcessor& rProc ) const override + { + return std::make_shared<DrawXmlFinalizer>(rStyles,rProc); + } + + virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext& rEmitContext) const override + { + return std::make_shared<DrawXmlEmitter>(rEmitContext, DrawXmlEmitter::DRAW_DOC); + } + }; + + } + + TreeVisitorFactorySharedPtr createWriterTreeVisitorFactory() + { + return std::make_shared<WriterTreeVisitorFactory>(); + } + TreeVisitorFactorySharedPtr createImpressTreeVisitorFactory() + { + return std::make_shared<ImpressTreeVisitorFactory>(); + } + TreeVisitorFactorySharedPtr createDrawTreeVisitorFactory() + { + return std::make_shared<DrawTreeVisitorFactory>(); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx new file mode 100644 index 000000000..52f45ed57 --- /dev/null +++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx @@ -0,0 +1,1299 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> +#include <sal/log.hxx> +#include <string_view> + +#include <pdfiprocessor.hxx> +#include <xmlemitter.hxx> +#include <pdfihelper.hxx> +#include <imagecontainer.hxx> +#include "style.hxx" +#include "writertreevisiting.hxx" +#include <genericelements.hxx> + +#include <basegfx/polygon/b2dpolypolygontools.hxx> +#include <osl/diagnose.h> +#include <com/sun/star/i18n/CharacterClassification.hpp> +#include <com/sun/star/i18n/DirectionProperty.hpp> +#include <comphelper/string.hxx> + +using namespace ::com::sun::star; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::uno; + +namespace pdfi +{ + +const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification() +{ + if ( !mxCharClass.is() ) + { + Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW ); + mxCharClass = CharacterClassification::create(xContext); + } + return mxCharClass; +} + +void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( elem.Children.empty() ) + return; + + const char* pType = dynamic_cast<DrawElement*>(elem.Children.front().get()) ? "draw:a" : "text:a"; + + PropertyMap aProps; + aProps[ "xlink:type" ] = "simple"; + aProps[ "xlink:href" ] = elem.URI; + aProps[ "office:target-frame-name" ] = "_blank"; + aProps[ "xlink:show" ] = "new"; + + m_rEmitContext.rEmitter.beginTag( pType, aProps ); + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + m_rEmitContext.rEmitter.endTag( pType ); +} + +void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( elem.Text.isEmpty() ) + return; + + PropertyMap aProps = {}; + const sal_Unicode strSpace = 0x0020; + const sal_Unicode strNbSpace = 0x00A0; + const sal_Unicode tabSpace = 0x0009; + + if( elem.StyleId != -1 ) + { + aProps[ OUString( "text:style-name" ) ] = + m_rEmitContext.rStyles.getStyleName( elem.StyleId ); + } + + OUString str(elem.Text.toString()); + + // Check for RTL + bool isRTL = false; + Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() ); + if( xCC.is() ) + { + for(int i=1; i< elem.Text.getLength(); i++) + { + i18n::DirectionProperty nType = static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i )); + if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT || + nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC || + nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING || + nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE + ) + isRTL = true; + } + } + + if (isRTL) // If so, reverse string + { + // First, produce mirrored-image for each code point which has the Bidi_Mirrored property. + str = PDFIProcessor::SubstituteBidiMirrored(str); + // Then, reverse the code points in the string, in backward order. + str = ::comphelper::string::reverseCodePoints(str); + } + + m_rEmitContext.rEmitter.beginTag( "text:span", aProps ); + + sal_Unicode strToken; + for (int i = 0; i < elem.Text.getLength(); i++) + { + strToken = str[i]; + if (strToken == strSpace || strToken == strNbSpace) + { + aProps["text:c"] = "1"; + m_rEmitContext.rEmitter.beginTag("text:s", aProps); + m_rEmitContext.rEmitter.endTag("text:s"); + } + else if (strToken == tabSpace) + { + m_rEmitContext.rEmitter.beginTag("text:tab", aProps); + m_rEmitContext.rEmitter.endTag("text:tab"); + } + else + m_rEmitContext.rEmitter.write(OUString(strToken)); + } + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + m_rEmitContext.rEmitter.endTag( "text:span" ); +} + +void WriterXmlEmitter::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + PropertyMap aProps; + if( elem.StyleId != -1 ) + { + aProps[ "text:style-name" ] = m_rEmitContext.rStyles.getStyleName( elem.StyleId ); + } + const char* pTagType = "text:p"; + if( elem.Type == ParagraphElement::Headline ) + pTagType = "text:h"; + m_rEmitContext.rEmitter.beginTag( pTagType, aProps ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + m_rEmitContext.rEmitter.endTag( pTagType ); +} + +void WriterXmlEmitter::fillFrameProps( DrawElement& rElem, + PropertyMap& rProps, + const EmitContext& rEmitContext ) +{ + double rel_x = rElem.x, rel_y = rElem.y; + + // find anchor type by recursing though parents + Element* pAnchor = &rElem; + ParagraphElement* pParaElt = nullptr; + PageElement* pPage = nullptr; + while ((pAnchor = pAnchor->Parent)) + { + if ((pParaElt = dynamic_cast<ParagraphElement*>(pAnchor))) + break; + if ((pPage = dynamic_cast<PageElement*>(pAnchor))) + break; + } + if( pAnchor ) + { + if (pParaElt) + { + rProps[ "text:anchor-type" ] = rElem.isCharacter + ? std::u16string_view(u"character") : std::u16string_view(u"paragraph"); + } + else + { + assert(pPage); // guaranteed by the while loop above + rProps[ "text:anchor-type" ] = "page"; + rProps[ "text:anchor-page-number" ] = OUString::number(pPage->PageNumber); + } + rel_x -= pAnchor->x; + rel_y -= pAnchor->y; + } + + rProps[ "draw:z-index" ] = OUString::number( rElem.ZOrder ); + rProps[ "draw:style-name"] = rEmitContext.rStyles.getStyleName( rElem.StyleId ); + rProps[ "svg:width" ] = convertPixelToUnitString( rElem.w ); + rProps[ "svg:height" ] = convertPixelToUnitString( rElem.h ); + + const GraphicsContext& rGC = + rEmitContext.rProcessor.getGraphicsContext( rElem.GCId ); + if( rGC.Transformation.isIdentity() ) + { + if( !rElem.isCharacter ) + { + rProps[ "svg:x" ] = convertPixelToUnitString( rel_x ); + rProps[ "svg:y" ] = convertPixelToUnitString( rel_y ); + } + } + else + { + basegfx::B2DTuple aScale, aTranslation; + double fRotate, fShearX; + + rGC.Transformation.decompose( aScale, aTranslation, fRotate, fShearX ); + + OUStringBuffer aBuf( 256 ); + + // TODO(F2): general transformation case missing; if implemented, note + // that ODF rotation is oriented the other way + + // build transformation string + if (rElem.MirrorVertical) + { + // At some point, rElem.h may start arriving positive, + // so use robust adjusting math + rel_y -= std::abs(rElem.h); + if (!aBuf.isEmpty()) + aBuf.append(' '); + aBuf.append("scale( 1.0 -1.0 )"); + } + if( fShearX != 0.0 ) + { + aBuf.append( "skewX( " ); + aBuf.append( fShearX ); + aBuf.append( " )" ); + } + if( fRotate != 0.0 ) + { + if( !aBuf.isEmpty() ) + aBuf.append( ' ' ); + aBuf.append( "rotate( " ); + aBuf.append( -fRotate ); + aBuf.append( " )" ); + + } + if( ! rElem.isCharacter ) + { + if( !aBuf.isEmpty() ) + aBuf.append( ' ' ); + aBuf.append( "translate( " ); + aBuf.append( convertPixelToUnitString( rel_x ) ); + aBuf.append( ' ' ); + aBuf.append( convertPixelToUnitString( rel_y ) ); + aBuf.append( " )" ); + } + + rProps[ "draw:transform" ] = aBuf.makeStringAndClear(); + } +} + +void WriterXmlEmitter::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( elem.Children.empty() ) + return; + + bool bTextBox = (dynamic_cast<ParagraphElement*>(elem.Children.front().get()) != nullptr); + PropertyMap aFrameProps; + fillFrameProps( elem, aFrameProps, m_rEmitContext ); + m_rEmitContext.rEmitter.beginTag( "draw:frame", aFrameProps ); + if( bTextBox ) + m_rEmitContext.rEmitter.beginTag( "draw:text-box", PropertyMap() ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } + + if( bTextBox ) + m_rEmitContext.rEmitter.endTag( "draw:text-box" ); + m_rEmitContext.rEmitter.endTag( "draw:frame" ); +} + +void WriterXmlEmitter::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + elem.updateGeometry(); + /* note: + * aw recommends using 100dth of mm in all respects since the xml import + * (a) is buggy (see issue 37213) + * (b) is optimized for 100dth of mm and does not scale itself then, + * this does not gain us speed but makes for smaller rounding errors since + * the xml importer coordinates are integer based + */ + for (sal_uInt32 i = 0; i< elem.PolyPoly.count(); i++) + { + basegfx::B2DPolygon b2dPolygon = elem.PolyPoly.getB2DPolygon( i ); + + for ( sal_uInt32 j = 0; j< b2dPolygon.count(); j++ ) + { + basegfx::B2DPoint point; + basegfx::B2DPoint nextPoint; + point = b2dPolygon.getB2DPoint( j ); + + basegfx::B2DPoint prevPoint = b2dPolygon.getPrevControlPoint( j ) ; + + point.setX( convPx2mmPrec2( point.getX() )*100.0 ); + point.setY( convPx2mmPrec2( point.getY() )*100.0 ); + + if ( b2dPolygon.isPrevControlPointUsed( j ) ) + { + prevPoint.setX( convPx2mmPrec2( prevPoint.getX() )*100.0 ); + prevPoint.setY( convPx2mmPrec2( prevPoint.getY() )*100.0 ); + } + + if ( b2dPolygon.isNextControlPointUsed( j ) ) + { + nextPoint = b2dPolygon.getNextControlPoint( j ) ; + nextPoint.setX( convPx2mmPrec2( nextPoint.getX() )*100.0 ); + nextPoint.setY( convPx2mmPrec2( nextPoint.getY() )*100.0 ); + } + + b2dPolygon.setB2DPoint( j, point ); + + if ( b2dPolygon.isPrevControlPointUsed( j ) ) + b2dPolygon.setPrevControlPoint( j , prevPoint ) ; + + if ( b2dPolygon.isNextControlPointUsed( j ) ) + b2dPolygon.setNextControlPoint( j , nextPoint ) ; + } + + elem.PolyPoly.setB2DPolygon( i, b2dPolygon ); + } + + PropertyMap aProps; + fillFrameProps( elem, aProps, m_rEmitContext ); + OUStringBuffer aBuf( 64 ); + aBuf.append( "0 0 " ); + aBuf.append( convPx2mmPrec2(elem.w)*100.0 ); + aBuf.append( ' ' ); + aBuf.append( convPx2mmPrec2(elem.h)*100.0 ); + aProps[ "svg:viewBox" ] = aBuf.makeStringAndClear(); + aProps[ "svg:d" ] = basegfx::utils::exportToSvgD( elem.PolyPoly, true, true, false ); + + m_rEmitContext.rEmitter.beginTag( "draw:path", aProps ); + m_rEmitContext.rEmitter.endTag( "draw:path" ); +} + +void WriterXmlEmitter::visit( ImageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + PropertyMap aImageProps; + m_rEmitContext.rEmitter.beginTag( "draw:image", aImageProps ); + m_rEmitContext.rEmitter.beginTag( "office:binary-data", PropertyMap() ); + m_rEmitContext.rImages.writeBase64EncodedStream( elem.Image, m_rEmitContext); + m_rEmitContext.rEmitter.endTag( "office:binary-data" ); + m_rEmitContext.rEmitter.endTag( "draw:image" ); +} + +void WriterXmlEmitter::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( m_rEmitContext.xStatusIndicator.is() ) + m_rEmitContext.xStatusIndicator->setValue( elem.PageNumber ); + + auto this_it = elem.Children.begin(); + while( this_it != elem.Children.end() && this_it->get() != &elem ) + { + (*this_it)->visitedBy( *this, this_it ); + ++this_it; + } +} + +void WriterXmlEmitter::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ + m_rEmitContext.rEmitter.beginTag( "office:body", PropertyMap() ); + m_rEmitContext.rEmitter.beginTag( "office:text", PropertyMap() ); + + for( const auto& rxChild : elem.Children ) + { + PageElement* pPage = dynamic_cast<PageElement*>(rxChild.get()); + if( pPage ) + { + // emit only page anchored objects + // currently these are only DrawElement types + for( auto child_it = pPage->Children.begin(); child_it != pPage->Children.end(); ++child_it ) + { + if( dynamic_cast<DrawElement*>(child_it->get()) != nullptr ) + (*child_it)->visitedBy( *this, child_it ); + } + } + } + + // do not emit page anchored objects, they are emitted before + // (must precede all pages in writer document) currently these are + // only DrawElement types + for( auto it = elem.Children.begin(); it != elem.Children.end(); ++it ) + { + if( dynamic_cast<DrawElement*>(it->get()) != nullptr ) + (*it)->visitedBy( *this, it ); + } + + m_rEmitContext.rEmitter.endTag( "office:text" ); + m_rEmitContext.rEmitter.endTag( "office:body" ); +} + + +void WriterXmlOptimizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void WriterXmlOptimizer::visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ +} + +void WriterXmlOptimizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + elem.applyToChildren(*this); +} + +void WriterXmlOptimizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void WriterXmlOptimizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& elemIt ) +{ + /* note: optimize two consecutive PolyPolyElements that + * have the same path but one of which is a stroke while + * the other is a fill + */ + if( !elem.Parent ) + return; + // find following PolyPolyElement in parent's children list + if( elemIt == elem.Parent->Children.end() ) + return; + auto next_it = elemIt; + ++next_it; + if( next_it == elem.Parent->Children.end() ) + return; + + PolyPolyElement* pNext = dynamic_cast<PolyPolyElement*>(next_it->get()); + if( !pNext || pNext->PolyPoly != elem.PolyPoly ) + return; + + const GraphicsContext& rNextGC = + m_rProcessor.getGraphicsContext( pNext->GCId ); + const GraphicsContext& rThisGC = + m_rProcessor.getGraphicsContext( elem.GCId ); + + if( !(rThisGC.BlendMode == rNextGC.BlendMode && + rThisGC.Flatness == rNextGC.Flatness && + rThisGC.Transformation == rNextGC.Transformation && + rThisGC.Clip == rNextGC.Clip && + pNext->Action == PATH_STROKE && + (elem.Action == PATH_FILL || elem.Action == PATH_EOFILL)) ) + return; + + GraphicsContext aGC = rThisGC; + aGC.LineJoin = rNextGC.LineJoin; + aGC.LineCap = rNextGC.LineCap; + aGC.LineWidth = rNextGC.LineWidth; + aGC.MiterLimit= rNextGC.MiterLimit; + aGC.DashArray = rNextGC.DashArray; + aGC.LineColor = rNextGC.LineColor; + elem.GCId = m_rProcessor.getGCId( aGC ); + + elem.Action |= pNext->Action; + + elem.Children.splice( elem.Children.end(), pNext->Children ); + elem.Parent->Children.erase(next_it); +} + +void WriterXmlOptimizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt) +{ + optimizeTextElements( elem ); + + elem.applyToChildren(*this); + + if( !(elem.Parent && rParentIt != elem.Parent->Children.end()) ) + return; + + // find if there is a previous paragraph that might be a heading for this one + auto prev = rParentIt; + ParagraphElement* pPrevPara = nullptr; + while( prev != elem.Parent->Children.begin() ) + { + --prev; + pPrevPara = dynamic_cast< ParagraphElement* >(prev->get()); + if( pPrevPara ) + { + /* What constitutes a heading ? current hints are: + * - one line only + * - not too far away from this paragraph (two heading height max ?) + * - font larger or bold + * this is of course incomplete + * FIXME: improve hints for heading + */ + // check for single line + if( pPrevPara->isSingleLined( m_rProcessor ) ) + { + double head_line_height = pPrevPara->getLineHeight( m_rProcessor ); + if( pPrevPara->y + pPrevPara->h + 2*head_line_height > elem.y ) + { + // check for larger font + if( head_line_height > elem.getLineHeight( m_rProcessor ) ) + { + pPrevPara->Type = ParagraphElement::Headline; + } + else + { + // check whether text of pPrevPara is bold (at least first text element) + // and this para is not bold (ditto) + TextElement* pPrevText = pPrevPara->getFirstTextChild(); + TextElement* pThisText = elem.getFirstTextChild(); + if( pPrevText && pThisText ) + { + const FontAttributes& rPrevFont = m_rProcessor.getFont( pPrevText->FontId ); + const FontAttributes& rThisFont = m_rProcessor.getFont( pThisText->FontId ); + if ( (rPrevFont.fontWeight == u"600" || + rPrevFont.fontWeight == u"bold" || + rPrevFont.fontWeight == u"800" || + rPrevFont.fontWeight == u"900" ) && + (rThisFont.fontWeight == u"600" || + rThisFont.fontWeight == u"bold" || + rThisFont.fontWeight == u"800" || + rThisFont.fontWeight == u"900" ) ) + { + pPrevPara->Type = ParagraphElement::Headline; + } + } + } + } + } + break; + } + } +} + +void WriterXmlOptimizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( m_rProcessor.getStatusIndicator().is() ) + m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber ); + + // resolve hyperlinks + elem.resolveHyperlinks(); + + elem.resolveFontStyles( m_rProcessor ); // underlines and such + + // FIXME: until hyperlinks and font effects are adjusted for + // geometrical search handle them before sorting + PDFIProcessor::sortElements( &elem ); + + // find paragraphs in text + ParagraphElement* pCurPara = nullptr; + std::list< std::unique_ptr<Element> >::iterator page_element, next_page_element; + next_page_element = elem.Children.begin(); + double fCurLineHeight = 0.0; // average height of text items in current para + int nCurLineElements = 0; // number of line contributing elements in current para + double line_left = elem.w, line_right = 0.0; + double column_width = elem.w*0.75; // estimate text width + // TODO: guess columns + while( next_page_element != elem.Children.end() ) + { + page_element = next_page_element++; + ParagraphElement* pPagePara = dynamic_cast<ParagraphElement*>(page_element->get()); + if( pPagePara ) + { + pCurPara = pPagePara; + // adjust line height and text items + fCurLineHeight = 0.0; + nCurLineElements = 0; + for( const auto& rxChild : pCurPara->Children ) + { + TextElement* pTestText = dynamic_cast<TextElement*>(rxChild.get()); + if( pTestText ) + { + fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pTestText->h)/double(nCurLineElements+1); + nCurLineElements++; + } + } + continue; + } + + HyperlinkElement* pLink = dynamic_cast<HyperlinkElement*>(page_element->get()); + DrawElement* pDraw = dynamic_cast<DrawElement*>(page_element->get()); + if( ! pDraw && pLink && ! pLink->Children.empty() ) + pDraw = dynamic_cast<DrawElement*>(pLink->Children.front().get() ); + if( pDraw ) + { + // insert small drawing objects as character, else leave them page bound + + bool bInsertToParagraph = false; + // first check if this is either inside the paragraph + if( pCurPara && pDraw->y < pCurPara->y + pCurPara->h ) + { + if( pDraw->h < fCurLineHeight * 1.5 ) + { + bInsertToParagraph = true; + fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pDraw->h)/double(nCurLineElements+1); + nCurLineElements++; + // mark draw element as character + pDraw->isCharacter = true; + } + } + // or perhaps the draw element begins a new paragraph + else if( next_page_element != elem.Children.end() ) + { + TextElement* pText = dynamic_cast<TextElement*>(next_page_element->get()); + if( ! pText ) + { + ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(next_page_element->get()); + if( pPara && ! pPara->Children.empty() ) + pText = dynamic_cast<TextElement*>(pPara->Children.front().get()); + } + if( pText && // check there is a text + pDraw->h < pText->h*1.5 && // and it is approx the same height + // and either upper or lower edge of pDraw is inside text's vertical range + ( ( pDraw->y >= pText->y && pDraw->y <= pText->y+pText->h ) || + ( pDraw->y+pDraw->h >= pText->y && pDraw->y+pDraw->h <= pText->y+pText->h ) + ) + ) + { + bInsertToParagraph = true; + fCurLineHeight = pDraw->h; + nCurLineElements = 1; + line_left = pDraw->x; + line_right = pDraw->x + pDraw->w; + // begin a new paragraph + pCurPara = nullptr; + // mark draw element as character + pDraw->isCharacter = true; + } + } + + if( ! bInsertToParagraph ) + { + pCurPara = nullptr; + continue; + } + } + + TextElement* pText = dynamic_cast<TextElement*>(page_element->get()); + if( ! pText && pLink && ! pLink->Children.empty() ) + pText = dynamic_cast<TextElement*>(pLink->Children.front().get()); + if( pText ) + { + Element* pGeo = pLink ? static_cast<Element*>(pLink) : + static_cast<Element*>(pText); + if( pCurPara ) + { + // there was already a text element, check for a new paragraph + if( nCurLineElements > 0 ) + { + // if the new text is significantly distant from the paragraph + // begin a new paragraph + if( pGeo->y > pCurPara->y+pCurPara->h + fCurLineHeight*0.5 ) + pCurPara = nullptr; // insert new paragraph + else if( pGeo->y > (pCurPara->y+pCurPara->h - fCurLineHeight*0.05) ) + { + // new paragraph if either the last line of the paragraph + // was significantly shorter than the paragraph as a whole + if( (line_right - line_left) < pCurPara->w*0.75 ) + pCurPara = nullptr; + // or the last line was significantly smaller than the column width + else if( (line_right - line_left) < column_width*0.75 ) + pCurPara = nullptr; + } + } + } + // update line height/width + if( pCurPara ) + { + fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pGeo->h)/double(nCurLineElements+1); + nCurLineElements++; + if( pGeo->x < line_left ) + line_left = pGeo->x; + if( pGeo->x+pGeo->w > line_right ) + line_right = pGeo->x+pGeo->w; + } + else + { + fCurLineHeight = pGeo->h; + nCurLineElements = 1; + line_left = pGeo->x; + line_right = pGeo->x + pGeo->w; + } + } + + // move element to current paragraph + if( ! pCurPara ) // new paragraph, insert one + { + pCurPara = ElementFactory::createParagraphElement( nullptr ); + // set parent + pCurPara->Parent = &elem; + //insert new paragraph before current element + page_element = elem.Children.insert( page_element, std::unique_ptr<Element>(pCurPara) ); + // forward iterator to current element again + ++ page_element; + // update next_element which is now invalid + next_page_element = page_element; + ++ next_page_element; + } + Element* pCurEle = page_element->get(); + Element::setParent( page_element, pCurPara ); + OSL_ENSURE( !pText || pCurEle == pText || pCurEle == pLink, "paragraph child list in disorder" ); + if( pText || pDraw ) + pCurPara->updateGeometryWith( pCurEle ); + } + + // process children + elem.applyToChildren(*this); + + // find possible header and footer + checkHeaderAndFooter( elem ); +} + +void WriterXmlOptimizer::checkHeaderAndFooter( PageElement& rElem ) +{ + /* indicators for a header: + * - single line paragraph at top of page (inside 15% page height) + * - at least lineheight above the next paragraph + * + * indicators for a footer likewise: + * - single line paragraph at bottom of page (inside 15% page height) + * - at least lineheight below the previous paragraph + */ + + auto isParagraphElement = [](std::unique_ptr<Element>& rxChild) -> bool { + return dynamic_cast<ParagraphElement*>(rxChild.get()) != nullptr; + }; + + // detect header + // Note: the following assumes that the pages' children have been + // sorted geometrically + auto it = std::find_if(rElem.Children.begin(), rElem.Children.end(), isParagraphElement); + if (it != rElem.Children.end()) + { + ParagraphElement& rPara = dynamic_cast<ParagraphElement&>(**it); + if( rPara.y+rPara.h < rElem.h*0.15 && rPara.isSingleLined( m_rProcessor ) ) + { + auto next_it = it; + ParagraphElement* pNextPara = nullptr; + while( ++next_it != rElem.Children.end() && pNextPara == nullptr ) + { + pNextPara = dynamic_cast<ParagraphElement*>(next_it->get()); + } + if( pNextPara && pNextPara->y > rPara.y+rPara.h*2 ) + { + rElem.HeaderElement = std::move(*it); + rPara.Parent = nullptr; + rElem.Children.erase( it ); + } + } + } + + // detect footer + auto rit = std::find_if(rElem.Children.rbegin(), rElem.Children.rend(), isParagraphElement); + if (rit == rElem.Children.rend()) + return; + + ParagraphElement& rPara = dynamic_cast<ParagraphElement&>(**rit); + if( !(rPara.y > rElem.h*0.85 && rPara.isSingleLined( m_rProcessor )) ) + return; + + std::list< std::unique_ptr<Element> >::reverse_iterator next_it = rit; + ParagraphElement* pNextPara = nullptr; + while( ++next_it != rElem.Children.rend() && pNextPara == nullptr ) + { + pNextPara = dynamic_cast<ParagraphElement*>(next_it->get()); + } + if( pNextPara && pNextPara->y < rPara.y-rPara.h*2 ) + { + rElem.FooterElement = std::move(*rit); + rPara.Parent = nullptr; + rElem.Children.erase( std::next(rit).base() ); + } +} + +void WriterXmlOptimizer::optimizeTextElements(Element& rParent) +{ + if( rParent.Children.empty() ) // this should not happen + { + OSL_FAIL( "empty paragraph optimized" ); + return; + } + + // concatenate child elements with same font id + auto next = rParent.Children.begin(); + auto it = next++; + FrameElement* pFrame = dynamic_cast<FrameElement*>(rParent.Parent); + bool bRotatedFrame = false; + if( pFrame ) + { + const GraphicsContext& rFrameGC = m_rProcessor.getGraphicsContext( pFrame->GCId ); + if( rFrameGC.isRotatedOrSkewed() ) + bRotatedFrame = true; + } + while( next != rParent.Children.end() ) + { + bool bConcat = false; + TextElement* pCur = dynamic_cast<TextElement*>(it->get()); + if( pCur ) + { + TextElement* pNext = dynamic_cast<TextElement*>(next->get()); + if( pNext ) + { + const GraphicsContext& rCurGC = m_rProcessor.getGraphicsContext( pCur->GCId ); + const GraphicsContext& rNextGC = m_rProcessor.getGraphicsContext( pNext->GCId ); + + // line and space optimization; works only in strictly horizontal mode + + if( !bRotatedFrame + && ! rCurGC.isRotatedOrSkewed() + && ! rNextGC.isRotatedOrSkewed() + && ! pNext->Text.isEmpty() + && pNext->Text[0] != ' ' + && ! pCur->Text.isEmpty() + && pCur->Text[pCur->Text.getLength() - 1] != ' ' + ) + { + // check for new line in paragraph + if( pNext->y > pCur->y+pCur->h ) + { + // new line begins + // check whether a space would should be inserted or a hyphen removed + sal_Unicode aLastCode = pCur->Text[pCur->Text.getLength() - 1]; + if( aLastCode == '-' + || aLastCode == 0x2010 + || (aLastCode >= 0x2012 && aLastCode <= 0x2015) + || aLastCode == 0xff0d + ) + { + // cut a hyphen + pCur->Text.setLength( pCur->Text.getLength()-1 ); + } + // append a space unless there is a non breaking hyphen + else if( aLastCode != 0x2011 ) + { + pCur->Text.append( ' ' ); + } + } + else // we're continuing the same line + { + // check whether a space would should be inserted + // check for a small horizontal offset + if( pCur->x + pCur->w + pNext->h*0.15 < pNext->x ) + { + pCur->Text.append( ' ' ); + } + } + } + // concatenate consecutive text elements unless there is a + // font or text color change, leave a new span in that case + if( pCur->FontId == pNext->FontId && + rCurGC.FillColor.Red == rNextGC.FillColor.Red && + rCurGC.FillColor.Green == rNextGC.FillColor.Green && + rCurGC.FillColor.Blue == rNextGC.FillColor.Blue && + rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha + ) + { + pCur->updateGeometryWith( pNext ); + // append text to current element + pCur->Text.append( pNext->Text ); + // append eventual children to current element + // and clear children (else the children just + // appended to pCur would be destroyed) + pCur->Children.splice( pCur->Children.end(), pNext->Children ); + // get rid of the now useless element + rParent.Children.erase( next ); + bConcat = true; + } + } + } + else if( dynamic_cast<HyperlinkElement*>(it->get()) ) + optimizeTextElements( **it ); + if( bConcat ) + { + next = it; + ++next; + } + else + { + ++it; + ++next; + } + } +} + +void WriterXmlOptimizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ + elem.applyToChildren(*this); +} + + +void WriterXmlFinalizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + // xxx TODO copied from DrawElement + const GraphicsContext& rGC = m_rProcessor.getGraphicsContext(elem.GCId ); + PropertyMap aProps; + aProps[ "style:family" ] = "graphic"; + + PropertyMap aGCProps; + if (elem.Action & PATH_STROKE) + { + double scale = GetAverageTransformationScale(rGC.Transformation); + if (rGC.DashArray.size() < 2) + { + aGCProps[ "draw:stroke" ] = "solid"; + } + else + { + PropertyMap props; + FillDashStyleProps(props, rGC.DashArray, scale); + StyleContainer::Style style("draw:stroke-dash", std::move(props)); + + aGCProps[ "draw:stroke" ] = "dash"; + aGCProps[ "draw:stroke-dash" ] = + m_rStyleContainer.getStyleName( + m_rStyleContainer.getStyleId(style)); + } + + aGCProps[ "svg:stroke-color" ] = getColorString(rGC.LineColor); + aGCProps[ "svg:stroke-width" ] = convertPixelToUnitString(rGC.LineWidth * scale); + aGCProps[ "draw:stroke-linejoin" ] = rGC.GetLineJoinString(); + aGCProps[ "svg:stroke-linecap" ] = rGC.GetLineCapString(); + } + else + { + aGCProps[ "draw:stroke" ] = "none"; + } + + // TODO(F1): check whether stuff could be emulated by gradient/bitmap/hatch + if( elem.Action & (PATH_FILL | PATH_EOFILL) ) + { + aGCProps[ "draw:fill" ] = "solid"; + aGCProps[ "draw:fill-color" ] = getColorString( rGC.FillColor ); + } + else + { + aGCProps[ "draw:fill" ] = "none"; + } + + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:graphic-properties", std::move(aGCProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); +} + +void WriterXmlFinalizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void WriterXmlFinalizer::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + const FontAttributes& rFont = m_rProcessor.getFont( elem.FontId ); + PropertyMap aProps; + aProps[ "style:family" ] = "text"; + + PropertyMap aFontProps; + + // family name + // TODO: tdf#143095: use system font name rather than PSName + SAL_INFO("sdext.pdfimport", "The font used in xml is: " << rFont.familyName); + aFontProps[ "fo:font-family" ] = rFont.familyName; + aFontProps[ "style:font-family-asian" ] = rFont.familyName; + aFontProps[ "style:font-family-complex" ] = rFont.familyName; + + // bold + aFontProps[ "fo:font-weight" ] = rFont.fontWeight; + aFontProps[ "style:font-weight-asian" ] = rFont.fontWeight; + aFontProps[ "style:font-weight-complex" ] = rFont.fontWeight; + + // italic + if( rFont.isItalic ) + { + aFontProps[ "fo:font-style" ] = "italic"; + aFontProps[ "style:font-style-asian" ] = "italic"; + aFontProps[ "style:font-style-complex" ] = "italic"; + } + + // underline + if( rFont.isUnderline ) + { + aFontProps[ "style:text-underline-style" ] = "solid"; + aFontProps[ "style:text-underline-width" ] = "auto"; + aFontProps[ "style:text-underline-color" ] = "font-color"; + } + + // outline + if( rFont.isOutline ) + aFontProps[ "style:text-outline" ] = "true"; + + // size + OUString aFSize = OUString::number( rFont.size*72/PDFI_OUTDEV_RESOLUTION ) + "pt"; + aFontProps[ "fo:font-size" ] = aFSize; + aFontProps[ "style:font-size-asian" ] = aFSize; + aFontProps[ "style:font-size-complex" ] = aFSize; + + // color + const GraphicsContext& rGC = m_rProcessor.getGraphicsContext( elem.GCId ); + aFontProps[ "fo:color" ] = getColorString( rFont.isOutline ? rGC.LineColor : rGC.FillColor ); + + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:text-properties", std::move(aFontProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); +} + +void WriterXmlFinalizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt ) +{ + PropertyMap aParaProps; + + if( elem.Parent ) + { + // check for center alignment + // criterion: paragraph is small relative to parent and distributed around its center + double p_x = elem.Parent->x; + double p_w = elem.Parent->w; + + PageElement* pPage = dynamic_cast<PageElement*>(elem.Parent); + if( pPage ) + { + p_x += pPage->LeftMargin; + p_w -= pPage->LeftMargin+pPage->RightMargin; + } + bool bIsCenter = false; + if( elem.w < ( p_w/2) ) + { + double delta = elem.w/4; + // allow very small paragraphs to deviate a little more + // relative to parent's center + if( elem.w < p_w/8 ) + delta = elem.w; + if( fabs( elem.x+elem.w/2 - ( p_x+ p_w/2) ) < delta || + (pPage && fabs( elem.x+elem.w/2 - (pPage->x + pPage->w/2) ) < delta) ) + { + bIsCenter = true; + aParaProps[ "fo:text-align" ] = "center"; + } + } + if( ! bIsCenter && elem.x > p_x + p_w/10 ) + { + // indent + OUStringBuffer aBuf( 32 ); + aBuf.append( convPx2mm( elem.x - p_x ) ); + aBuf.append( "mm" ); + aParaProps[ "fo:margin-left" ] = aBuf.makeStringAndClear(); + } + + // check whether to leave some space to next paragraph + // find whether there is a next paragraph + auto it = rParentIt; + const ParagraphElement* pNextPara = nullptr; + while( ++it != elem.Parent->Children.end() && ! pNextPara ) + pNextPara = dynamic_cast< const ParagraphElement* >(it->get()); + if( pNextPara ) + { + if( pNextPara->y - (elem.y+elem.h) > convmm2Px( 10 ) ) + { + OUStringBuffer aBuf( 32 ); + aBuf.append( convPx2mm( pNextPara->y - (elem.y+elem.h) ) ); + aBuf.append( "mm" ); + aParaProps[ "fo:margin-bottom" ] = aBuf.makeStringAndClear(); + } + } + } + + if( ! aParaProps.empty() ) + { + PropertyMap aProps; + aProps[ "style:family" ] = "paragraph"; + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:paragraph-properties", std::move(aParaProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); + } + + elem.applyToChildren(*this); +} + +void WriterXmlFinalizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&) +{ + PropertyMap aProps; + aProps[ "style:family" ] = "graphic"; + + PropertyMap aGCProps; + + aGCProps[ "draw:stroke" ] = "none"; + aGCProps[ "draw:fill" ] = "none"; + aGCProps[ "draw:auto-grow-height" ] = "true"; + aGCProps[ "draw:auto-grow-width" ] = "true"; + aGCProps[ "draw:textarea-horizontal-align" ] = "left"; + aGCProps[ "draw:textarea-vertical-align" ] = "top"; + aGCProps[ "fo:min-height"] = "0cm"; + aGCProps[ "fo:min-width"] = "0cm"; + aGCProps[ "fo:padding-top" ] = "0cm"; + aGCProps[ "fo:padding-left" ] = "0cm"; + aGCProps[ "fo:padding-right" ] = "0cm"; + aGCProps[ "fo:padding-bottom" ] = "0cm"; + + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + StyleContainer::Style aSubStyle( "style:graphic-properties", std::move(aGCProps) ); + aStyle.SubStyles.push_back( &aSubStyle ); + + elem.StyleId = m_rStyleContainer.getStyleId( aStyle ); + elem.applyToChildren(*this); +} + +void WriterXmlFinalizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ +} + +void WriterXmlFinalizer::setFirstOnPage( ParagraphElement& rElem, + StyleContainer& rStyles, + const OUString& rMasterPageName ) +{ + PropertyMap aProps; + if( rElem.StyleId != -1 ) + { + const PropertyMap* pProps = rStyles.getProperties( rElem.StyleId ); + if( pProps ) + aProps = *pProps; + } + + aProps[ "style:family" ] = "paragraph"; + aProps[ "style:master-page-name" ] = rMasterPageName; + + if( rElem.StyleId != -1 ) + rElem.StyleId = rStyles.setProperties( rElem.StyleId, std::move(aProps) ); + else + { + StyleContainer::Style aStyle( "style:style", std::move(aProps) ); + rElem.StyleId = rStyles.getStyleId( aStyle ); + } +} + +void WriterXmlFinalizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + if( m_rProcessor.getStatusIndicator().is() ) + m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber ); + + // transform from pixel to mm + double page_width = convPx2mm( elem.w ), page_height = convPx2mm( elem.h ); + + // calculate page margins out of the relevant children (paragraphs) + elem.TopMargin = elem.h; + elem.BottomMargin = 0; + elem.LeftMargin = elem.w; + elem.RightMargin = 0; + // first element should be a paragraph + ParagraphElement* pFirstPara = nullptr; + for( const auto& rxChild : elem.Children ) + { + if( dynamic_cast<ParagraphElement*>( rxChild.get() ) ) + { + if( rxChild->x < elem.LeftMargin ) + elem.LeftMargin = rxChild->x; + if( rxChild->y < elem.TopMargin ) + elem.TopMargin = rxChild->y; + if( rxChild->x + rxChild->w > elem.w - elem.RightMargin ) + elem.RightMargin = elem.w - (rxChild->x + rxChild->w); + if( rxChild->y + rxChild->h > elem.h - elem.BottomMargin ) + elem.BottomMargin = elem.h - (rxChild->y + rxChild->h); + if( ! pFirstPara ) + pFirstPara = dynamic_cast<ParagraphElement*>( rxChild.get() ); + } + } + if( elem.HeaderElement && elem.HeaderElement->y < elem.TopMargin ) + elem.TopMargin = elem.HeaderElement->y; + if( elem.FooterElement && elem.FooterElement->y+elem.FooterElement->h > elem.h - elem.BottomMargin ) + elem.BottomMargin = elem.h - (elem.FooterElement->y + elem.FooterElement->h); + + // transform margins to mm + double left_margin = convPx2mm( elem.LeftMargin ); + double right_margin = convPx2mm( elem.RightMargin ); + double top_margin = convPx2mm( elem.TopMargin ); + double bottom_margin = convPx2mm( elem.BottomMargin ); + if( ! pFirstPara ) + { + // use default page margins + left_margin = 10; + right_margin = 10; + top_margin = 10; + bottom_margin = 10; + } + + // round left/top margin to nearest mm + left_margin = rtl_math_round( left_margin, 0, rtl_math_RoundingMode_Floor ); + top_margin = rtl_math_round( top_margin, 0, rtl_math_RoundingMode_Floor ); + // round (fuzzy) right/bottom margin to nearest cm + right_margin = rtl_math_round( right_margin, right_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor ); + bottom_margin = rtl_math_round( bottom_margin, bottom_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor ); + + // set reasonable default in case of way too large margins + // e.g. no paragraph case + if( left_margin > page_width/2.0 - 10 ) + left_margin = 10; + if( right_margin > page_width/2.0 - 10 ) + right_margin = 10; + if( top_margin > page_height/2.0 - 10 ) + top_margin = 10; + if( bottom_margin > page_height/2.0 - 10 ) + bottom_margin = 10; + + // catch the weird cases + if( left_margin < 0 ) + left_margin = 0; + if( right_margin < 0 ) + right_margin = 0; + if( top_margin < 0 ) + top_margin = 0; + if( bottom_margin < 0 ) + bottom_margin = 0; + + // widely differing margins are unlikely to be correct + if( right_margin > left_margin*1.5 ) + right_margin = left_margin; + + elem.LeftMargin = convmm2Px( left_margin ); + elem.RightMargin = convmm2Px( right_margin ); + elem.TopMargin = convmm2Px( top_margin ); + elem.BottomMargin = convmm2Px( bottom_margin ); + + // get styles for paragraphs + PropertyMap aPageProps; + PropertyMap aPageLayoutProps; + aPageLayoutProps[ "fo:page-width" ] = unitMMString( page_width ); + aPageLayoutProps[ "fo:page-height" ] = unitMMString( page_height ); + aPageLayoutProps[ "style:print-orientation" ] + = elem.w < elem.h ? std::u16string_view(u"portrait") : std::u16string_view(u"landscape"); + aPageLayoutProps[ "fo:margin-top" ] = unitMMString( top_margin ); + aPageLayoutProps[ "fo:margin-bottom" ] = unitMMString( bottom_margin ); + aPageLayoutProps[ "fo:margin-left" ] = unitMMString( left_margin ); + aPageLayoutProps[ "fo:margin-right" ] = unitMMString( right_margin ); + aPageLayoutProps[ "style:writing-mode" ]= "lr-tb"; + + StyleContainer::Style aStyle( "style:page-layout", std::move(aPageProps)); + StyleContainer::Style aSubStyle( "style:page-layout-properties", std::move(aPageLayoutProps)); + aStyle.SubStyles.push_back(&aSubStyle); + sal_Int32 nPageStyle = m_rStyleContainer.impl_getStyleId( aStyle, false ); + + // create master page + OUString aMasterPageLayoutName = m_rStyleContainer.getStyleName( nPageStyle ); + aPageProps[ "style:page-layout-name" ] = aMasterPageLayoutName; + StyleContainer::Style aMPStyle( "style:master-page", std::move(aPageProps) ); + StyleContainer::Style aHeaderStyle( "style:header", PropertyMap() ); + StyleContainer::Style aFooterStyle( "style:footer", PropertyMap() ); + if( elem.HeaderElement ) + { + elem.HeaderElement->visitedBy( *this, std::list<std::unique_ptr<Element>>::iterator() ); + aHeaderStyle.ContainedElement = elem.HeaderElement.get(); + aMPStyle.SubStyles.push_back( &aHeaderStyle ); + } + if( elem.FooterElement ) + { + elem.FooterElement->visitedBy( *this, std::list<std::unique_ptr<Element>>::iterator() ); + aFooterStyle.ContainedElement = elem.FooterElement.get(); + aMPStyle.SubStyles.push_back( &aFooterStyle ); + } + elem.StyleId = m_rStyleContainer.impl_getStyleId( aMPStyle,false ); + + + OUString aMasterPageName = m_rStyleContainer.getStyleName( elem.StyleId ); + + // create styles for children + elem.applyToChildren(*this); + + // no paragraph or other elements before the first paragraph + if( ! pFirstPara ) + { + pFirstPara = ElementFactory::createParagraphElement( nullptr ); + pFirstPara->Parent = &elem; + elem.Children.push_front( std::unique_ptr<Element>(pFirstPara) ); + } + setFirstOnPage(*pFirstPara, m_rStyleContainer, aMasterPageName); +} + +void WriterXmlFinalizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& ) +{ + elem.applyToChildren(*this); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx new file mode 100644 index 000000000..e473c2737 --- /dev/null +++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx @@ -0,0 +1,110 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_WRITERTREEVISITING_HXX +#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_WRITERTREEVISITING_HXX + +#include <treevisiting.hxx> + +#include <pdfihelper.hxx> + +#include <com/sun/star/i18n/XCharacterClassification.hpp> + +namespace pdfi +{ + struct DrawElement; + + class WriterXmlOptimizer : public ElementTreeVisitor + { + private: + PDFIProcessor& m_rProcessor; + void optimizeTextElements(Element& rParent); + void checkHeaderAndFooter( PageElement& rElem ); + + public: + explicit WriterXmlOptimizer(PDFIProcessor& rProcessor) : + m_rProcessor(rProcessor) + {} + + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + class WriterXmlFinalizer : public ElementTreeVisitor + { + private: + StyleContainer& m_rStyleContainer; + PDFIProcessor& m_rProcessor; + + static void setFirstOnPage( ParagraphElement& rElem, + StyleContainer& rStyles, + const OUString& rMasterPageName ); + + public: + explicit WriterXmlFinalizer(StyleContainer& rStyleContainer, + PDFIProcessor& rProcessor) : + m_rStyleContainer(rStyleContainer), + m_rProcessor(rProcessor) + {} + + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; + + class WriterXmlEmitter : public ElementTreeVisitor + { + private: + css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass; + EmitContext& m_rEmitContext ; + static void fillFrameProps( DrawElement& rElem, + PropertyMap& rProps, + const EmitContext& rEmitContext ); + + public: + const css::uno::Reference<css::i18n::XCharacterClassification >& GetCharacterClassification(); + explicit WriterXmlEmitter(EmitContext& rEmitContext) : + m_rEmitContext(rEmitContext) + {} + + virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override; + }; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |