summaryrefslogtreecommitdiffstats
path: root/sdext/source/pdfimport/tree
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
commit267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sdext/source/pdfimport/tree
parentInitial commit. (diff)
downloadlibreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sdext/source/pdfimport/tree')
-rw-r--r--sdext/source/pdfimport/tree/drawtreevisiting.cxx1096
-rw-r--r--sdext/source/pdfimport/tree/drawtreevisiting.hxx115
-rw-r--r--sdext/source/pdfimport/tree/genericelements.cxx449
-rw-r--r--sdext/source/pdfimport/tree/imagecontainer.cxx146
-rw-r--r--sdext/source/pdfimport/tree/pdfiprocessor.cxx717
-rw-r--r--sdext/source/pdfimport/tree/style.cxx246
-rw-r--r--sdext/source/pdfimport/tree/style.hxx166
-rw-r--r--sdext/source/pdfimport/tree/treevisitorfactory.cxx111
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.cxx1347
-rw-r--r--sdext/source/pdfimport/tree/writertreevisiting.hxx113
10 files changed, 4506 insertions, 0 deletions
diff --git a/sdext/source/pdfimport/tree/drawtreevisiting.cxx b/sdext/source/pdfimport/tree/drawtreevisiting.cxx
new file mode 100644
index 0000000000..5aae544158
--- /dev/null
+++ b/sdext/source/pdfimport/tree/drawtreevisiting.cxx
@@ -0,0 +1,1096 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#include <sal/log.hxx>
+#include <pdfiprocessor.hxx>
+#include <xmlemitter.hxx>
+#include <pdfihelper.hxx>
+#include <imagecontainer.hxx>
+#include "style.hxx"
+#include "drawtreevisiting.hxx"
+#include <genericelements.hxx>
+
+#include <basegfx/polygon/b2dpolypolygontools.hxx>
+#include <osl/diagnose.h>
+#include <rtl/math.hxx>
+#include <com/sun/star/i18n/BreakIterator.hpp>
+#include <com/sun/star/i18n/CharacterClassification.hpp>
+#include <com/sun/star/i18n/ScriptType.hpp>
+#include <com/sun/star/i18n/DirectionProperty.hpp>
+#include <comphelper/string.hxx>
+
+#include <string.h>
+#include <string_view>
+
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::uno;
+
+namespace pdfi
+{
+
+const Reference< XBreakIterator >& DrawXmlOptimizer::GetBreakIterator()
+{
+ if ( !mxBreakIter.is() )
+ {
+ Reference< XComponentContext > xContext( m_rProcessor.m_xContext, uno::UNO_SET_THROW );
+ mxBreakIter = BreakIterator::create(xContext);
+ }
+ return mxBreakIter;
+}
+
+const Reference< XCharacterClassification >& DrawXmlEmitter::GetCharacterClassification()
+{
+ if ( !mxCharClass.is() )
+ {
+ Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW );
+ mxCharClass = CharacterClassification::create(xContext);
+ }
+ return mxCharClass;
+}
+
+void DrawXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( elem.Children.empty() )
+ return;
+
+ const char* pType = dynamic_cast<DrawElement*>(elem.Children.front().get()) ? "draw:a" : "text:a";
+
+ PropertyMap aProps;
+ aProps[ "xlink:type" ] = "simple";
+ aProps[ "xlink:href" ] = elem.URI;
+ aProps[ "office:target-frame-name" ] = "_blank";
+ aProps[ "xlink:show" ] = "new";
+
+ m_rEmitContext.rEmitter.beginTag( pType, aProps );
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+ m_rEmitContext.rEmitter.endTag( pType );
+}
+
+void DrawXmlEmitter::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( elem.Text.isEmpty() )
+ return;
+
+ OUString strSpace(u' ');
+ OUString strNbSpace(u'\x00A0');
+ OUString tabSpace(u'\x0009');
+ PropertyMap aProps;
+ if( elem.StyleId != -1 )
+ {
+ aProps[ OUString( "text:style-name" ) ] =
+ m_rEmitContext.rStyles.getStyleName( elem.StyleId );
+ }
+
+ OUString str(elem.Text.toString());
+
+ // Check for RTL
+ bool isRTL = false;
+ Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() );
+ if( xCC.is() )
+ {
+ for(int i=1; i< elem.Text.getLength(); i++)
+ {
+ css::i18n::DirectionProperty nType = static_cast<css::i18n::DirectionProperty>(xCC->getCharacterDirection( str, i ));
+ if ( nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT ||
+ nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC ||
+ nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING ||
+ nType == css::i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE
+ )
+ isRTL = true;
+ }
+ }
+
+ if (isRTL) // If so, reverse string
+ {
+ // First, produce mirrored-image for each code point which has the Bidi_Mirrored property.
+ str = PDFIProcessor::SubstituteBidiMirrored(str);
+ // Then, reverse the code points in the string, in backward order.
+ str = ::comphelper::string::reverseCodePoints(str);
+ }
+
+ m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
+
+ aProps = {};
+ for(int i=0; i< elem.Text.getLength(); i++)
+ {
+ OUString strToken= str.copy(i,1) ;
+ if( strSpace == strToken || strNbSpace == strToken )
+ {
+ aProps[ "text:c" ] = "1";
+ m_rEmitContext.rEmitter.beginTag( "text:s", aProps );
+ m_rEmitContext.rEmitter.endTag( "text:s");
+ }
+ else
+ {
+ if( tabSpace == strToken )
+ {
+ m_rEmitContext.rEmitter.beginTag( "text:tab", aProps );
+ m_rEmitContext.rEmitter.endTag( "text:tab");
+ }
+ else
+ {
+ m_rEmitContext.rEmitter.write( strToken );
+ }
+ }
+ }
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ m_rEmitContext.rEmitter.endTag( "text:span" );
+}
+
+void DrawXmlEmitter::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ PropertyMap aProps;
+ if( elem.StyleId != -1 )
+ {
+ aProps[ "text:style-name" ] = m_rEmitContext.rStyles.getStyleName( elem.StyleId );
+ }
+ const char* pTagType = "text:p";
+ if( elem.Type == ParagraphElement::Headline )
+ pTagType = "text:h";
+ m_rEmitContext.rEmitter.beginTag( pTagType, aProps );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ m_rEmitContext.rEmitter.endTag( pTagType );
+}
+
+void DrawXmlEmitter::fillFrameProps( DrawElement& rElem,
+ PropertyMap& rProps,
+ const EmitContext& rEmitContext,
+ bool bWasTransformed
+ )
+{
+ static constexpr OUStringLiteral sDrawZIndex = u"draw:z-index";
+ static constexpr OUStringLiteral sDrawStyleName = u"draw:style-name";
+ static constexpr OUStringLiteral sDrawTextStyleName = u"draw:text-style-name";
+ static constexpr OUStringLiteral sSvgX = u"svg:x";
+ static constexpr OUStringLiteral sSvgY = u"svg:y";
+ static constexpr OUStringLiteral sSvgWidth = u"svg:width";
+ static constexpr OUStringLiteral sSvgHeight = u"svg:height";
+ static constexpr OUStringLiteral sDrawTransform = u"draw:transform";
+
+ rProps[ sDrawZIndex ] = OUString::number( rElem.ZOrder );
+ rProps[ sDrawStyleName ] = rEmitContext.rStyles.getStyleName( rElem.StyleId );
+
+ if (rElem.IsForText)
+ rProps[ sDrawTextStyleName ] = rEmitContext.rStyles.getStyleName(rElem.TextStyleId);
+
+ const GraphicsContext& rGC =
+ rEmitContext.rProcessor.getGraphicsContext( rElem.GCId );
+
+ if (bWasTransformed)
+ {
+ rProps[ sSvgX ] = convertPixelToUnitString(rElem.x);
+ rProps[ sSvgY ] = convertPixelToUnitString(rElem.y);
+ rProps[ sSvgWidth ] = convertPixelToUnitString(rElem.w);
+ rProps[ sSvgHeight ] = convertPixelToUnitString(rElem.h);
+ }
+ else
+ {
+ basegfx::B2DHomMatrix mat(rGC.Transformation);
+
+ if (rElem.MirrorVertical)
+ {
+ basegfx::B2DHomMatrix mat2;
+ mat2.translate(0, -0.5);
+ mat2.scale(1, -1);
+ mat2.translate(0, 0.5);
+ mat = mat * mat2;
+ }
+
+ double scale = convPx2mm(100);
+ mat.scale(scale, scale);
+
+ rProps[ sDrawTransform ] =
+ OUString::Concat("matrix(")
+ + OUString::number(mat.get(0, 0))
+ + " "
+ + OUString::number(mat.get(1, 0))
+ + " "
+ + OUString::number(mat.get(0, 1))
+ + " "
+ + OUString::number(mat.get(1, 1))
+ + " "
+ + OUString::number(mat.get(0, 2))
+ + " "
+ + OUString::number(mat.get(1, 2))
+ + ")";
+
+ }
+}
+
+void DrawXmlEmitter::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( elem.Children.empty() )
+ return;
+
+ bool bTextBox = (dynamic_cast<ParagraphElement*>(elem.Children.front().get()) != nullptr);
+ PropertyMap aFrameProps;
+ fillFrameProps( elem, aFrameProps, m_rEmitContext, false );
+ m_rEmitContext.rEmitter.beginTag( "draw:frame", aFrameProps );
+ if( bTextBox )
+ m_rEmitContext.rEmitter.beginTag( "draw:text-box", PropertyMap() );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ if( bTextBox )
+ m_rEmitContext.rEmitter.endTag( "draw:text-box" );
+ m_rEmitContext.rEmitter.endTag( "draw:frame" );
+}
+
+void DrawXmlEmitter::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ elem.updateGeometry();
+ /* note:
+ * aw recommends using 100dth of mm in all respects since the xml import
+ * (a) is buggy (see issue 37213)
+ * (b) is optimized for 100dth of mm and does not scale itself then,
+ * this does not gain us speed but makes for smaller rounding errors since
+ * the xml importer coordinates are integer based
+ */
+ for (sal_uInt32 i = 0; i< elem.PolyPoly.count(); i++)
+ {
+ basegfx::B2DPolygon b2dPolygon = elem.PolyPoly.getB2DPolygon( i );
+
+ for ( sal_uInt32 j = 0; j< b2dPolygon.count(); j++ )
+ {
+ basegfx::B2DPoint point;
+ basegfx::B2DPoint nextPoint;
+ point = b2dPolygon.getB2DPoint( j );
+
+ basegfx::B2DPoint prevPoint = b2dPolygon.getPrevControlPoint( j ) ;
+
+ point.setX( convPx2mmPrec2( point.getX() )*100.0 );
+ point.setY( convPx2mmPrec2( point.getY() )*100.0 );
+
+ if ( b2dPolygon.isPrevControlPointUsed( j ) )
+ {
+ prevPoint.setX( convPx2mmPrec2( prevPoint.getX() )*100.0 );
+ prevPoint.setY( convPx2mmPrec2( prevPoint.getY() )*100.0 );
+ }
+
+ if ( b2dPolygon.isNextControlPointUsed( j ) )
+ {
+ nextPoint = b2dPolygon.getNextControlPoint( j ) ;
+ nextPoint.setX( convPx2mmPrec2( nextPoint.getX() )*100.0 );
+ nextPoint.setY( convPx2mmPrec2( nextPoint.getY() )*100.0 );
+ }
+
+ b2dPolygon.setB2DPoint( j, point );
+
+ if ( b2dPolygon.isPrevControlPointUsed( j ) )
+ b2dPolygon.setPrevControlPoint( j , prevPoint ) ;
+
+ if ( b2dPolygon.isNextControlPointUsed( j ) )
+ b2dPolygon.setNextControlPoint( j , nextPoint ) ;
+ }
+
+ elem.PolyPoly.setB2DPolygon( i, b2dPolygon );
+ }
+
+ PropertyMap aProps;
+ // PDFIProcessor transforms geometrical objects, not images and text
+ // so we need to tell fillFrameProps here that the transformation for
+ // a PolyPolyElement was already applied (aside from translation)
+ fillFrameProps( elem, aProps, m_rEmitContext, true );
+ aProps[ "svg:viewBox" ] =
+ "0 0 "
+ + OUString::number( convPx2mmPrec2(elem.w)*100.0 )
+ + " "
+ + OUString::number( convPx2mmPrec2(elem.h)*100.0 );
+ aProps[ "svg:d" ] = basegfx::utils::exportToSvgD( elem.PolyPoly, false, true, false );
+
+ m_rEmitContext.rEmitter.beginTag( "draw:path", aProps );
+ m_rEmitContext.rEmitter.endTag( "draw:path" );
+}
+
+void DrawXmlEmitter::visit( ImageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ PropertyMap aImageProps;
+ m_rEmitContext.rEmitter.beginTag( "draw:image", aImageProps );
+ m_rEmitContext.rEmitter.beginTag( "office:binary-data", PropertyMap() );
+ m_rEmitContext.rImages.writeBase64EncodedStream( elem.Image, m_rEmitContext);
+ m_rEmitContext.rEmitter.endTag( "office:binary-data" );
+ m_rEmitContext.rEmitter.endTag( "draw:image" );
+}
+
+void DrawXmlEmitter::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ PropertyMap aPageProps;
+ aPageProps[ "draw:master-page-name" ] = m_rEmitContext.rStyles.getStyleName( elem.StyleId );
+
+ m_rEmitContext.rEmitter.beginTag("draw:page", aPageProps);
+
+ if( m_rEmitContext.xStatusIndicator.is() )
+ m_rEmitContext.xStatusIndicator->setValue( elem.PageNumber );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ m_rEmitContext.rEmitter.endTag("draw:page");
+}
+
+void DrawXmlEmitter::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+ m_rEmitContext.rEmitter.beginTag( "office:body", PropertyMap() );
+ m_rEmitContext.rEmitter.beginTag( m_bWriteDrawDocument ? "office:drawing" : "office:presentation",
+ PropertyMap() );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ m_rEmitContext.rEmitter.endTag( m_bWriteDrawDocument ? "office:drawing" : "office:presentation" );
+ m_rEmitContext.rEmitter.endTag( "office:body" );
+}
+
+
+void DrawXmlOptimizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void DrawXmlOptimizer::visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+}
+
+void DrawXmlOptimizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ elem.applyToChildren(*this);
+}
+
+void DrawXmlOptimizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void DrawXmlOptimizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& elemIt )
+{
+ /* note: optimize two consecutive PolyPolyElements that
+ * have the same path but one of which is a stroke while
+ * the other is a fill
+ */
+ if( !elem.Parent )
+ return;
+
+ // find following PolyPolyElement in parent's children list
+ if( elemIt == elem.Parent->Children.end() )
+ return;
+ auto next_it = elemIt;
+ ++next_it;
+ if( next_it == elem.Parent->Children.end() )
+ return;
+
+ PolyPolyElement* pNext = dynamic_cast<PolyPolyElement*>(next_it->get());
+ // TODO(F2): this comparison fails for OOo-generated polygons with beziers.
+ if( !pNext || pNext->PolyPoly != elem.PolyPoly )
+ return;
+
+ const GraphicsContext& rNextGC =
+ m_rProcessor.getGraphicsContext( pNext->GCId );
+ const GraphicsContext& rThisGC =
+ m_rProcessor.getGraphicsContext( elem.GCId );
+
+ if( !(rThisGC.BlendMode == rNextGC.BlendMode &&
+ rThisGC.Flatness == rNextGC.Flatness &&
+ rThisGC.Transformation == rNextGC.Transformation &&
+ rThisGC.Clip == rNextGC.Clip &&
+ rThisGC.FillColor.Red == rNextGC.FillColor.Red &&
+ rThisGC.FillColor.Green== rNextGC.FillColor.Green &&
+ rThisGC.FillColor.Blue == rNextGC.FillColor.Blue &&
+ rThisGC.FillColor.Alpha== rNextGC.FillColor.Alpha &&
+ pNext->Action == PATH_STROKE &&
+ (elem.Action == PATH_FILL || elem.Action == PATH_EOFILL)) )
+ return;
+
+ GraphicsContext aGC = rThisGC;
+ aGC.LineJoin = rNextGC.LineJoin;
+ aGC.LineCap = rNextGC.LineCap;
+ aGC.LineWidth = rNextGC.LineWidth;
+ aGC.MiterLimit= rNextGC.MiterLimit;
+ aGC.DashArray = rNextGC.DashArray;
+ aGC.LineColor = rNextGC.LineColor;
+ elem.GCId = m_rProcessor.getGCId( aGC );
+
+ elem.Action |= pNext->Action;
+
+ elem.Children.splice( elem.Children.end(), pNext->Children );
+ elem.Parent->Children.erase(next_it);
+}
+
+void DrawXmlOptimizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ optimizeTextElements( elem );
+
+ elem.applyToChildren(*this);
+}
+
+void DrawXmlOptimizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( m_rProcessor.getStatusIndicator().is() )
+ m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber );
+
+ // resolve hyperlinks
+ elem.resolveHyperlinks();
+
+ elem.resolveFontStyles( m_rProcessor ); // underlines and such
+
+ // FIXME: until hyperlinks and font effects are adjusted for
+ // geometrical search handle them before sorting
+ PDFIProcessor::sortElements( &elem );
+
+ // find paragraphs in text
+ ParagraphElement* pCurPara = nullptr;
+ std::list< std::unique_ptr<Element> >::iterator page_element, next_page_element;
+ next_page_element = elem.Children.begin();
+ double fCurLineHeight = 0.0; // average height of text items in current para
+ int nCurLineElements = 0; // number of line contributing elements in current para
+ double line_left = elem.w, line_right = 0.0;
+ double column_width = elem.w*0.75; // estimate text width
+ // TODO: guess columns
+ while( next_page_element != elem.Children.end() )
+ {
+ page_element = next_page_element++;
+ ParagraphElement* pPagePara = dynamic_cast<ParagraphElement*>(page_element->get());
+ if( pPagePara )
+ {
+ pCurPara = pPagePara;
+ // adjust line height and text items
+ fCurLineHeight = 0.0;
+ nCurLineElements = 0;
+ for( const auto& rxChild : pCurPara->Children )
+ {
+ TextElement* pTestText = rxChild->dynCastAsTextElement();
+ if( pTestText )
+ {
+ fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pTestText->h)/double(nCurLineElements+1);
+ nCurLineElements++;
+ }
+ }
+ continue;
+ }
+
+ HyperlinkElement* pLink = dynamic_cast<HyperlinkElement*>(page_element->get());
+ DrawElement* pDraw = dynamic_cast<DrawElement*>(page_element->get());
+ if( ! pDraw && pLink && ! pLink->Children.empty() )
+ pDraw = dynamic_cast<DrawElement*>(pLink->Children.front().get() );
+ if( pDraw )
+ {
+ // insert small drawing objects as character, else leave them page bound
+
+ bool bInsertToParagraph = false;
+ // first check if this is either inside the paragraph
+ if( pCurPara && pDraw->y < pCurPara->y + pCurPara->h )
+ {
+ if( pDraw->h < fCurLineHeight * 1.5 )
+ {
+ bInsertToParagraph = true;
+ fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pDraw->h)/double(nCurLineElements+1);
+ nCurLineElements++;
+ // mark draw element as character
+ pDraw->isCharacter = true;
+ }
+ }
+ // or perhaps the draw element begins a new paragraph
+ else if( next_page_element != elem.Children.end() )
+ {
+ TextElement* pText = (*next_page_element)->dynCastAsTextElement();
+ if( ! pText )
+ {
+ ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(next_page_element->get());
+ if( pPara && ! pPara->Children.empty() )
+ pText = pPara->Children.front()->dynCastAsTextElement();
+ }
+ if( pText && // check there is a text
+ pDraw->h < pText->h*1.5 && // and it is approx the same height
+ // and either upper or lower edge of pDraw is inside text's vertical range
+ ( ( pDraw->y >= pText->y && pDraw->y <= pText->y+pText->h ) ||
+ ( pDraw->y+pDraw->h >= pText->y && pDraw->y+pDraw->h <= pText->y+pText->h )
+ )
+ )
+ {
+ bInsertToParagraph = true;
+ fCurLineHeight = pDraw->h;
+ nCurLineElements = 1;
+ line_left = pDraw->x;
+ line_right = pDraw->x + pDraw->w;
+ // begin a new paragraph
+ pCurPara = nullptr;
+ // mark draw element as character
+ pDraw->isCharacter = true;
+ }
+ }
+
+ if( ! bInsertToParagraph )
+ {
+ pCurPara = nullptr;
+ continue;
+ }
+ }
+
+ TextElement* pText = (*page_element)->dynCastAsTextElement();
+ if( ! pText && pLink && ! pLink->Children.empty() )
+ pText = pLink->Children.front()->dynCastAsTextElement();
+ if( pText )
+ {
+ Element* pGeo = pLink ? static_cast<Element*>(pLink) :
+ static_cast<Element*>(pText);
+ if( pCurPara )
+ {
+ // there was already a text element, check for a new paragraph
+ if( nCurLineElements > 0 )
+ {
+ // if the new text is significantly distant from the paragraph
+ // begin a new paragraph
+ if( pGeo->y > pCurPara->y + pCurPara->h + fCurLineHeight*0.5 )
+ pCurPara = nullptr; // insert new paragraph
+ else if( pGeo->y > (pCurPara->y+pCurPara->h - fCurLineHeight*0.05) )
+ {
+ // new paragraph if either the last line of the paragraph
+ // was significantly shorter than the paragraph as a whole
+ if( (line_right - line_left) < pCurPara->w*0.75 )
+ pCurPara = nullptr;
+ // or the last line was significantly smaller than the column width
+ else if( (line_right - line_left) < column_width*0.75 )
+ pCurPara = nullptr;
+ }
+ }
+
+
+ }
+
+
+ // update line height/width
+ if( pCurPara )
+ {
+ fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pGeo->h)/double(nCurLineElements+1);
+ nCurLineElements++;
+ if( pGeo->x < line_left )
+ line_left = pGeo->x;
+ if( pGeo->x+pGeo->w > line_right )
+ line_right = pGeo->x+pGeo->w;
+ }
+ else
+ {
+ fCurLineHeight = pGeo->h;
+ nCurLineElements = 1;
+ line_left = pGeo->x;
+ line_right = pGeo->x + pGeo->w;
+ }
+ }
+
+
+ // move element to current paragraph
+ if (! pCurPara ) // new paragraph, insert one
+ {
+ pCurPara = ElementFactory::createParagraphElement( nullptr );
+ // set parent
+ pCurPara->Parent = &elem;
+ //insert new paragraph before current element
+ page_element = elem.Children.insert( page_element, std::unique_ptr<Element>(pCurPara) );
+ // forward iterator to current element again
+ ++ page_element;
+ // update next_element which is now invalid
+ next_page_element = page_element;
+ ++ next_page_element;
+ }
+ Element* pCurEle = page_element->get();
+ Element::setParent( page_element, pCurPara );
+ OSL_ENSURE( !pText || pCurEle == pText || pCurEle == pLink, "paragraph child list in disorder" );
+ if( pText || pDraw )
+ pCurPara->updateGeometryWith( pCurEle );
+ }
+
+ // process children
+ elem.applyToChildren(*this);
+}
+
+static bool isSpaces(TextElement* pTextElem)
+{
+ for (sal_Int32 i = 0; i != pTextElem->Text.getLength(); ++i) {
+ if (pTextElem->Text[i] != ' ') {
+ return false;
+ }
+ }
+ return true;
+}
+
+void DrawXmlOptimizer::optimizeTextElements(Element& rParent)
+{
+ if( rParent.Children.empty() ) // this should not happen
+ {
+ OSL_FAIL( "empty paragraph optimized" );
+ return;
+ }
+
+ // concatenate child elements with same font id
+ auto next = rParent.Children.begin();
+ auto it = next++;
+
+ while( next != rParent.Children.end() )
+ {
+ bool bConcat = false;
+ TextElement* pCur = (*it)->dynCastAsTextElement();
+
+ if( pCur )
+ {
+ TextElement* pNext = (*next)->dynCastAsTextElement();
+ OUString str;
+ bool bPara = strspn("ParagraphElement", typeid(rParent).name());
+ ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(&rParent);
+ if (bPara && pPara && isComplex(GetBreakIterator(), pCur))
+ pPara->bRtl = true;
+ if( pNext )
+ {
+ const GraphicsContext& rCurGC = m_rProcessor.getGraphicsContext( pCur->GCId );
+ const GraphicsContext& rNextGC = m_rProcessor.getGraphicsContext( pNext->GCId );
+
+ // line and space optimization; works only in strictly horizontal mode
+
+ // concatenate consecutive text elements unless there is a
+ // font or text color change, leave a new span in that case
+ if( (pCur->FontId == pNext->FontId || isSpaces(pNext)) &&
+ rCurGC.FillColor.Red == rNextGC.FillColor.Red &&
+ rCurGC.FillColor.Green == rNextGC.FillColor.Green &&
+ rCurGC.FillColor.Blue == rNextGC.FillColor.Blue &&
+ rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha
+ )
+ {
+ pCur->updateGeometryWith( pNext );
+ if (pPara && pPara->bRtl)
+ {
+ // Tdf#152083: If RTL, reverse the text in pNext so that its correct order is
+ // restored when the combined text is reversed in DrawXmlEmitter::visit.
+ OUString tempStr;
+ bool bNeedReverse=false;
+ str = pNext->Text.toString();
+ for (sal_Int32 i=0; i < str.getLength(); i++)
+ {
+ if (str[i] == u' ')
+ { // Space char (e.g. the space as in " م") needs special treatment.
+ // First, append the space char to pCur.
+ pCur->Text.append(OUStringChar(str[i]));
+ // Then, check whether the tmpStr needs reverse, if so then reverse and append.
+ if (bNeedReverse)
+ {
+ tempStr = ::comphelper::string::reverseCodePoints(tempStr);
+ pCur->Text.append(tempStr);
+ tempStr = u""_ustr;
+ }
+ bNeedReverse = false;
+ }
+ else
+ {
+ tempStr += OUStringChar(str[i]);
+ bNeedReverse = true;
+ }
+ }
+ // Do the last append
+ if (bNeedReverse)
+ {
+ tempStr = ::comphelper::string::reverseCodePoints(tempStr);
+ pCur->Text.append(tempStr);
+ }
+ else
+ {
+ pCur->Text.append(tempStr);
+ }
+ }
+ else
+ {
+ // append text to current element directly without reverse
+ pCur->Text.append( pNext->Text );
+ }
+
+ if (bPara && pPara && isComplex(GetBreakIterator(), pCur))
+ pPara->bRtl = true;
+ // append eventual children to current element
+ // and clear children (else the children just
+ // appended to pCur would be destroyed)
+ pCur->Children.splice( pCur->Children.end(), pNext->Children );
+ // get rid of the now useless element
+ rParent.Children.erase( next );
+ bConcat = true;
+ }
+ }
+ }
+ else if( dynamic_cast<HyperlinkElement*>(it->get()) )
+ optimizeTextElements( **it );
+ if ( bConcat )
+ next = it;
+ else
+ ++it;
+ ++next;
+ }
+}
+
+void DrawXmlOptimizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+ elem.applyToChildren(*this);
+}
+
+
+void DrawXmlFinalizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ // xxx TODO copied from DrawElement
+ const GraphicsContext& rGC = m_rProcessor.getGraphicsContext(elem.GCId );
+
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "graphic";
+ aProps[ "style:parent-style-name" ] = "standard";
+ // generate standard graphic style if necessary
+ m_rStyleContainer.getStandardStyleId( "graphic" );
+
+ PropertyMap aGCProps;
+ if (elem.Action & PATH_STROKE)
+ {
+ double scale = GetAverageTransformationScale(rGC.Transformation);
+ if (rGC.DashArray.size() < 2)
+ {
+ aGCProps[ "draw:stroke" ] = "solid";
+ }
+ else
+ {
+ PropertyMap props;
+ FillDashStyleProps(props, rGC.DashArray, scale);
+ StyleContainer::Style style("draw:stroke-dash"_ostr, std::move(props));
+
+ aGCProps[ "draw:stroke" ] = "dash";
+ aGCProps[ "draw:stroke-dash" ] =
+ m_rStyleContainer.getStyleName(
+ m_rStyleContainer.getStyleId(style));
+ }
+
+ aGCProps[ "svg:stroke-color" ] = getColorString(rGC.LineColor);
+ if (rGC.LineColor.Alpha != 1.0)
+ aGCProps["svg:stroke-opacity"] = getPercentString(rGC.LineColor.Alpha * 100.0);
+ aGCProps[ "svg:stroke-width" ] = convertPixelToUnitString(rGC.LineWidth * scale);
+ aGCProps[ "draw:stroke-linejoin" ] = rGC.GetLineJoinString();
+ aGCProps[ "svg:stroke-linecap" ] = rGC.GetLineCapString();
+ }
+ else
+ {
+ aGCProps[ "draw:stroke" ] = "none";
+ }
+
+ // TODO(F1): check whether stuff could be emulated by gradient/bitmap/hatch
+ if( elem.Action & (PATH_FILL | PATH_EOFILL) )
+ {
+ aGCProps[ "draw:fill" ] = "solid";
+ aGCProps[ "draw:fill-color" ] = getColorString(rGC.FillColor);
+ if (rGC.FillColor.Alpha != 1.0)
+ aGCProps["draw:opacity"] = getPercentString(rGC.FillColor.Alpha * 100.0);
+ }
+ else
+ {
+ aGCProps[ "draw:fill" ] = "none";
+ }
+
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:graphic-properties"_ostr, std::move(aGCProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+}
+
+void DrawXmlFinalizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+static void SetFontsizeProperties(PropertyMap& props, double fontSize)
+{
+ OUString aFSize = OUString::number(fontSize * 72 / PDFI_OUTDEV_RESOLUTION) + "pt";
+ props["fo:font-size"] = aFSize;
+ props["style:font-size-asian"] = aFSize;
+ props["style:font-size-complex"] = aFSize;
+}
+
+void DrawXmlFinalizer::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ const FontAttributes& rFont = m_rProcessor.getFont( elem.FontId );
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "text";
+
+ PropertyMap aFontProps;
+
+ // family name
+ // TODO: tdf#143095: use system font name rather than PSName
+ SAL_INFO("sdext.pdfimport", "The font used in xml is: " << rFont.familyName);
+ aFontProps[ "fo:font-family" ] = rFont.familyName;
+ aFontProps[ "style:font-family-asian" ] = rFont.familyName;
+ aFontProps[ "style:font-family-complex" ] = rFont.familyName;
+
+ // bold
+ aFontProps[ "fo:font-weight" ] = rFont.fontWeight;
+ aFontProps[ "style:font-weight-asian" ] = rFont.fontWeight;
+ aFontProps[ "style:font-weight-complex" ] = rFont.fontWeight;
+
+ // italic
+ if( rFont.isItalic )
+ {
+ aFontProps[ "fo:font-style" ] = "italic";
+ aFontProps[ "style:font-style-asian" ] = "italic";
+ aFontProps[ "style:font-style-complex" ] = "italic";
+ }
+
+ // underline
+ if( rFont.isUnderline )
+ {
+ aFontProps[ "style:text-underline-style" ] = "solid";
+ aFontProps[ "style:text-underline-width" ] = "auto";
+ aFontProps[ "style:text-underline-color" ] = "font-color";
+ }
+
+ // outline
+ if( rFont.isOutline )
+ aFontProps[ "style:text-outline" ] = "true";
+
+ // size
+ SetFontsizeProperties(aFontProps, rFont.size);
+
+ // color
+ const GraphicsContext& rGC = m_rProcessor.getGraphicsContext( elem.GCId );
+ aFontProps[ "fo:color" ] = getColorString( rFont.isOutline ? rGC.LineColor : rGC.FillColor );
+
+ // scale
+ double fRotate, fShearX;
+ basegfx::B2DTuple aScale, aTranslation;
+ rGC.Transformation.decompose(aScale, aTranslation, fRotate, fShearX);
+ double textScale = 100 * aScale.getX() / aScale.getY();
+ if (((textScale >= 1) && (textScale <= 99)) ||
+ ((textScale >= 101) && (textScale <= 999)))
+ {
+ aFontProps[ "style:text-scale" ] = getPercentString(textScale);
+ }
+
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:text-properties"_ostr, std::move(aFontProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+}
+
+void DrawXmlFinalizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "paragraph";
+ // generate standard paragraph style if necessary
+ m_rStyleContainer.getStandardStyleId( "paragraph" );
+
+ PropertyMap aParProps;
+
+ aParProps[ "fo:text-align"] = "start";
+ if (elem.bRtl)
+ aParProps[ "style:writing-mode"] = "rl-tb";
+ else
+ aParProps[ "style:writing-mode"] = "lr-tb";
+
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:paragraph-properties"_ostr, std::move(aParProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+
+ elem.applyToChildren(*this);
+}
+
+void DrawXmlFinalizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+ PropertyMap props1;
+ props1[ "style:family" ] = "graphic";
+ props1[ "style:parent-style-name" ] = "standard";
+ // generate standard graphic style if necessary
+ m_rStyleContainer.getStandardStyleId( "graphic" );
+
+ PropertyMap aGCProps;
+
+ aGCProps[ "draw:stroke" ] = "none";
+ aGCProps[ "draw:fill" ] = "none";
+ aGCProps[ "draw:auto-grow-height" ] = "true";
+ aGCProps[ "draw:auto-grow-width" ] = "true";
+ aGCProps[ "draw:textarea-horizontal-align" ] = "left";
+ aGCProps[ "draw:textarea-vertical-align" ] = "top";
+ aGCProps[ "fo:min-height"] = "0cm";
+ aGCProps[ "fo:min-width"] = "0cm";
+ aGCProps[ "fo:padding-top" ] = "0cm";
+ aGCProps[ "fo:padding-left" ] = "0cm";
+ aGCProps[ "fo:padding-right" ] = "0cm";
+ aGCProps[ "fo:padding-bottom" ] = "0cm";
+
+ StyleContainer::Style style1( "style:style"_ostr, std::move(props1) );
+ StyleContainer::Style subStyle1( "style:graphic-properties"_ostr, std::move(aGCProps) );
+ style1.SubStyles.push_back(&subStyle1);
+
+ elem.StyleId = m_rStyleContainer.getStyleId(style1);
+
+ if (elem.IsForText)
+ {
+ PropertyMap props2;
+ props2["style:family"] = "paragraph";
+
+ PropertyMap textProps;
+ SetFontsizeProperties(textProps, elem.FontSize);
+
+ StyleContainer::Style style2("style:style"_ostr, std::move(props2));
+ StyleContainer::Style subStyle2("style:text-properties"_ostr, std::move(textProps));
+ style2.SubStyles.push_back(&subStyle2);
+ elem.TextStyleId = m_rStyleContainer.getStyleId(style2);
+ }
+
+ elem.applyToChildren(*this);
+}
+
+void DrawXmlFinalizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void DrawXmlFinalizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( m_rProcessor.getStatusIndicator().is() )
+ m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber );
+
+ // transform from pixel to mm
+ double page_width = convPx2mm( elem.w ), page_height = convPx2mm( elem.h );
+
+ // calculate page margins out of the relevant children (paragraphs)
+ elem.TopMargin = elem.h;
+ elem.BottomMargin = 0;
+ elem.LeftMargin = elem.w;
+ elem.RightMargin = 0;
+
+ for( const auto& rxChild : elem.Children )
+ {
+ if( rxChild->x < elem.LeftMargin )
+ elem.LeftMargin = rxChild->x;
+ if( rxChild->y < elem.TopMargin )
+ elem.TopMargin = rxChild->y;
+ if( rxChild->x + rxChild->w > elem.RightMargin )
+ elem.RightMargin = (rxChild->x + rxChild->w);
+ if( rxChild->y + rxChild->h > elem.BottomMargin )
+ elem.BottomMargin = (rxChild->y + rxChild->h);
+ }
+
+ // transform margins to mm
+ double left_margin = convPx2mm( elem.LeftMargin );
+ double right_margin = convPx2mm( elem.RightMargin );
+ double top_margin = convPx2mm( elem.TopMargin );
+ double bottom_margin = convPx2mm( elem.BottomMargin );
+
+ // round left/top margin to nearest mm
+ left_margin = rtl_math_round( left_margin, 0, rtl_math_RoundingMode_Floor );
+ top_margin = rtl_math_round( top_margin, 0, rtl_math_RoundingMode_Floor );
+ // round (fuzzy) right/bottom margin to nearest cm
+ right_margin = rtl_math_round( right_margin, right_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor );
+ bottom_margin = rtl_math_round( bottom_margin, bottom_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor );
+
+ // set reasonable default in case of way too large margins
+ // e.g. no paragraph case
+ if( left_margin > page_width/2.0 - 10 )
+ left_margin = 10;
+ if( right_margin > page_width/2.0 - 10 )
+ right_margin = 10;
+ if( top_margin > page_height/2.0 - 10 )
+ top_margin = 10;
+ if( bottom_margin > page_height/2.0 - 10 )
+ bottom_margin = 10;
+
+ // catch the weird cases
+ if( left_margin < 0 )
+ left_margin = 0;
+ if( right_margin < 0 )
+ right_margin = 0;
+ if( top_margin < 0 )
+ top_margin = 0;
+ if( bottom_margin < 0 )
+ bottom_margin = 0;
+
+ // widely differing margins are unlikely to be correct
+ if( right_margin > left_margin*1.5 )
+ right_margin = left_margin;
+
+ elem.LeftMargin = convmm2Px( left_margin );
+ elem.RightMargin = convmm2Px( right_margin );
+ elem.TopMargin = convmm2Px( top_margin );
+ elem.BottomMargin = convmm2Px( bottom_margin );
+
+ // get styles for paragraphs
+ PropertyMap aPageProps;
+ PropertyMap aPageLayoutProps;
+ aPageLayoutProps[ "fo:margin-top" ] = unitMMString( top_margin );
+ aPageLayoutProps[ "fo:margin-bottom" ] = unitMMString( bottom_margin );
+ aPageLayoutProps[ "fo:margin-left" ] = unitMMString( left_margin );
+ aPageLayoutProps[ "fo:margin-right" ] = unitMMString( right_margin );
+ aPageLayoutProps[ "fo:page-width" ] = unitMMString( page_width );
+ aPageLayoutProps[ "fo:page-height" ] = unitMMString( page_height );
+ aPageLayoutProps[ "style:print-orientation" ]= elem.w < elem.h ? std::u16string_view(u"portrait") : std::u16string_view(u"landscape");
+ aPageLayoutProps[ "style:writing-mode" ]= "lr-tb";
+
+ StyleContainer::Style aStyle( "style:page-layout"_ostr, std::move(aPageProps));
+ StyleContainer::Style aSubStyle( "style:page-layout-properties"_ostr, std::move(aPageLayoutProps));
+ aStyle.SubStyles.push_back(&aSubStyle);
+ sal_Int32 nPageStyle = m_rStyleContainer.impl_getStyleId( aStyle, false );
+
+ // create master page
+ OUString aMasterPageLayoutName = m_rStyleContainer.getStyleName( nPageStyle );
+ aPageProps[ "style:page-layout-name" ] = aMasterPageLayoutName;
+
+ StyleContainer::Style aMPStyle( "style:master-page"_ostr, std::move(aPageProps));
+
+ elem.StyleId = m_rStyleContainer.impl_getStyleId( aMPStyle,false );
+
+ // create styles for children
+ elem.applyToChildren(*this);
+}
+
+void DrawXmlFinalizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ elem.applyToChildren(*this);
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/drawtreevisiting.hxx b/sdext/source/pdfimport/tree/drawtreevisiting.hxx
new file mode 100644
index 0000000000..e3ea8e537f
--- /dev/null
+++ b/sdext/source/pdfimport/tree/drawtreevisiting.hxx
@@ -0,0 +1,115 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_DRAWTREEVISITING_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_DRAWTREEVISITING_HXX
+
+#include <treevisiting.hxx>
+
+#include <com/sun/star/i18n/XBreakIterator.hpp>
+#include <com/sun/star/i18n/XCharacterClassification.hpp>
+
+namespace pdfi
+{
+ struct DrawElement;
+
+ class DrawXmlOptimizer : public ElementTreeVisitor
+ {
+ private:
+ PDFIProcessor& m_rProcessor;
+ css::uno::Reference< css::i18n::XBreakIterator > mxBreakIter;
+
+ void optimizeTextElements(Element& rParent);
+
+ public:
+ const css::uno::Reference< css::i18n::XBreakIterator >& GetBreakIterator();
+ explicit DrawXmlOptimizer(PDFIProcessor& rProcessor) :
+ m_rProcessor(rProcessor)
+ {}
+
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ class DrawXmlFinalizer : public ElementTreeVisitor
+ {
+ private:
+ StyleContainer& m_rStyleContainer;
+ PDFIProcessor& m_rProcessor;
+
+ public:
+ explicit DrawXmlFinalizer(StyleContainer& rStyleContainer,
+ PDFIProcessor& rProcessor) :
+ m_rStyleContainer(rStyleContainer),
+ m_rProcessor(rProcessor)
+ {}
+
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ class DrawXmlEmitter : public ElementTreeVisitor
+ {
+ private:
+ css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass;
+
+ EmitContext& m_rEmitContext ;
+ /// writes Impress doc when false
+ const bool m_bWriteDrawDocument;
+
+ static void fillFrameProps( DrawElement& rElem,
+ PropertyMap& rProps,
+ const EmitContext& rEmitContext,
+ bool bWasTransformed
+ );
+
+ public:
+ const css::uno::Reference< css::i18n::XCharacterClassification >& GetCharacterClassification();
+ enum DocType{ DRAW_DOC, IMPRESS_DOC };
+ explicit DrawXmlEmitter(EmitContext& rEmitContext, DocType eDocType) :
+ m_rEmitContext(rEmitContext),
+ m_bWriteDrawDocument(eDocType==DRAW_DOC)
+ {}
+
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/genericelements.cxx b/sdext/source/pdfimport/tree/genericelements.cxx
new file mode 100644
index 0000000000..1d11cd0d91
--- /dev/null
+++ b/sdext/source/pdfimport/tree/genericelements.cxx
@@ -0,0 +1,449 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <genericelements.hxx>
+#include <pdfiprocessor.hxx>
+#include <pdfihelper.hxx>
+
+#include <com/sun/star/i18n/BreakIterator.hpp>
+#include <com/sun/star/i18n/ScriptType.hpp>
+#include <basegfx/polygon/b2dpolypolygontools.hxx>
+#include <basegfx/range/b2drange.hxx>
+#include <sal/log.hxx>
+
+namespace pdfi
+{
+
+Element::~Element()
+{
+}
+
+void Element::applyToChildren( ElementTreeVisitor& rVisitor )
+{
+ for( auto it = Children.begin(); it != Children.end(); ++it )
+ (*it)->visitedBy( rVisitor, it );
+}
+
+void Element::setParent( std::list<std::unique_ptr<Element>>::iterator const & el, Element* pNewParent )
+{
+ if( pNewParent )
+ {
+ pNewParent->Children.splice( pNewParent->Children.end(), (*el)->Parent->Children, el );
+ (*el)->Parent = pNewParent;
+ }
+}
+
+void Element::updateGeometryWith( const Element* pMergeFrom )
+{
+ if( w == 0 && h == 0 )
+ {
+ x = pMergeFrom->x;
+ y = pMergeFrom->y;
+ w = pMergeFrom->w;
+ h = pMergeFrom->h;
+ }
+ else
+ {
+ if( pMergeFrom->x < x )
+ {
+ w += x - pMergeFrom->x;
+ x = pMergeFrom->x;
+ }
+ if( pMergeFrom->x+pMergeFrom->w > x+w )
+ w = pMergeFrom->w+pMergeFrom->x - x;
+ if( pMergeFrom->y < y )
+ {
+ h += y - pMergeFrom->y;
+ y = pMergeFrom->y;
+ }
+ if( pMergeFrom->y+pMergeFrom->h > y+h )
+ h = pMergeFrom->h+pMergeFrom->y - y;
+ }
+}
+
+
+#if OSL_DEBUG_LEVEL > 0
+#include <typeinfo>
+void Element::emitStructure( int nLevel)
+{
+ SAL_INFO( "sdext", std::string(nLevel, ' ') << "<" << typeid( *this ).name() << " " << this << "> ("
+ << std::setprecision(1) << x << "," << y << ")+(" << w << "x" << h << ")" );
+ for (auto const& child : Children)
+ child->emitStructure(nLevel+1);
+ SAL_INFO( "sdext", std::string(nLevel, ' ') << "</" << typeid( *this ).name() << ">" );
+}
+#endif
+
+void ListElement::visitedBy( ElementTreeVisitor& visitor, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ // this is only an inner node
+ applyToChildren(visitor);
+}
+
+void HyperlinkElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt )
+{
+ rVisitor.visit(*this,rParentIt);
+}
+
+void TextElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt )
+{
+ rVisitor.visit(*this,rParentIt);
+}
+
+void FrameElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt )
+{
+ rVisitor.visit(*this,rParentIt);
+}
+
+void ImageElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt)
+{
+ rVisitor.visit( *this, rParentIt);
+}
+
+PolyPolyElement::PolyPolyElement( Element* pParent,
+ sal_Int32 nGCId,
+ const basegfx::B2DPolyPolygon& rPolyPoly,
+ sal_Int8 nAction )
+ : DrawElement( pParent, nGCId ),
+ PolyPoly( rPolyPoly ),
+ Action( nAction )
+{
+}
+
+void PolyPolyElement::updateGeometry()
+{
+ basegfx::B2DRange aRange;
+ if( PolyPoly.areControlPointsUsed() )
+ aRange = basegfx::utils::getRange( basegfx::utils::adaptiveSubdivideByAngle( PolyPoly ) );
+ else
+ aRange = basegfx::utils::getRange( PolyPoly );
+ x = aRange.getMinX();
+ y = aRange.getMinY();
+ w = aRange.getWidth();
+ h = aRange.getHeight();
+
+ // fdo#32330 - non-closed paths will not show up filled in LibO
+ if( Action & (PATH_FILL | PATH_EOFILL) )
+ PolyPoly.setClosed(true);
+}
+
+void PolyPolyElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt)
+{
+ rVisitor.visit( *this, rParentIt);
+}
+
+#if OSL_DEBUG_LEVEL > 0
+void PolyPolyElement::emitStructure( int nLevel)
+{
+ SAL_INFO( "sdext", std::string(nLevel, ' ') << "<" << typeid( *this ).name() << " " << this << ">" );
+ SAL_INFO( "sdext", "path=" );
+ int nPoly = PolyPoly.count();
+ for( int i = 0; i < nPoly; i++ )
+ {
+ OUStringBuffer buff;
+ basegfx::B2DPolygon aPoly = PolyPoly.getB2DPolygon( i );
+ int nPoints = aPoly.count();
+ for( int n = 0; n < nPoints; n++ )
+ {
+ basegfx::B2DPoint aPoint = aPoly.getB2DPoint( n );
+ buff.append( " (" + OUString::number(aPoint.getX()) + "," + OUString::number(aPoint.getY()) + ")");
+ }
+ SAL_INFO( "sdext", " " << buff.makeStringAndClear() );
+ }
+ for (auto const& child : Children)
+ child->emitStructure( nLevel+1 );
+ SAL_INFO( "sdext", std::string(nLevel, ' ') << "</" << typeid( *this ).name() << ">");
+}
+#endif
+
+void ParagraphElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt )
+{
+ rVisitor.visit(*this,rParentIt);
+}
+
+bool ParagraphElement::isSingleLined( PDFIProcessor const & rProc ) const
+{
+ TextElement* pText = nullptr, *pLastText = nullptr;
+ for( auto& rxChild : Children )
+ {
+ // a paragraph containing subparagraphs cannot be single lined
+ if( dynamic_cast< ParagraphElement* >(rxChild.get()) != nullptr )
+ return false;
+
+ pText = rxChild->dynCastAsTextElement();
+ if( pText )
+ {
+ const FontAttributes& rFont = rProc.getFont( pText->FontId );
+ if( pText->h > rFont.size*1.5 )
+ return false;
+ if( pLastText )
+ {
+ if( pText->y > pLastText->y+pLastText->h ||
+ pLastText->y > pText->y+pText->h )
+ return false;
+ }
+ else
+ pLastText = pText;
+ }
+ }
+
+ // a paragraph without a single text is not considered single lined
+ return pLastText != nullptr;
+}
+
+double ParagraphElement::getLineHeight( PDFIProcessor& rProc ) const
+{
+ double line_h = 0;
+ for( auto& rxChild : Children )
+ {
+ ParagraphElement* pPara = dynamic_cast< ParagraphElement* >(rxChild.get());
+ TextElement* pText = nullptr;
+ if( pPara )
+ {
+ double lh = pPara->getLineHeight( rProc );
+ if( lh > line_h )
+ line_h = lh;
+ }
+ else if( (pText = rxChild->dynCastAsTextElement()) != nullptr )
+ {
+ const FontAttributes& rFont = rProc.getFont( pText->FontId );
+ double lh = pText->h;
+ if( pText->h > rFont.size*1.5 )
+ lh = rFont.size;
+ if( lh > line_h )
+ line_h = lh;
+ }
+ }
+ return line_h;
+}
+
+TextElement* ParagraphElement::getFirstTextChild() const
+{
+ TextElement* pText = nullptr;
+ auto it = std::find_if(Children.begin(), Children.end(),
+ [](const std::unique_ptr<Element>& rxElem) { return rxElem->dynCastAsTextElement() != nullptr; });
+ if (it != Children.end())
+ pText = (*it)->dynCastAsTextElement();
+ return pText;
+}
+
+PageElement::~PageElement()
+{
+}
+
+void PageElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt )
+{
+ rVisitor.visit(*this, rParentIt);
+}
+
+bool PageElement::resolveHyperlink( const std::list<std::unique_ptr<Element>>::iterator& link_it, std::list<std::unique_ptr<Element>>& rElements )
+{
+ HyperlinkElement* pLink = dynamic_cast<HyperlinkElement*>(link_it->get());
+ if( ! pLink ) // sanity check
+ return false;
+
+ for( auto it = rElements.begin(); it != rElements.end(); ++it )
+ {
+ if( (*it)->x >= pLink->x && (*it)->x + (*it)->w <= pLink->x + pLink->w &&
+ (*it)->y >= pLink->y && (*it)->y + (*it)->h <= pLink->y + pLink->h )
+ {
+ TextElement* pText = (*it)->dynCastAsTextElement();
+ if( pText )
+ {
+ if( pLink->Children.empty() )
+ {
+ // insert the hyperlink before the frame
+ rElements.splice( it, Hyperlinks.Children, link_it );
+ pLink->Parent = (*it)->Parent;
+ }
+ // move text element into hyperlink
+ auto next = it;
+ ++next;
+ Element::setParent( it, pLink );
+ it = next;
+ --it;
+ continue;
+ }
+ // a link can contain multiple text elements or a single frame
+ if( ! pLink->Children.empty() )
+ continue;
+ if( dynamic_cast<ParagraphElement*>(it->get()) )
+ {
+ if( resolveHyperlink( link_it, (*it)->Children ) )
+ break;
+ continue;
+ }
+ FrameElement* pFrame = dynamic_cast<FrameElement*>(it->get());
+ if( pFrame )
+ {
+ // insert the hyperlink before the frame
+ rElements.splice( it, Hyperlinks.Children, link_it );
+ pLink->Parent = (*it)->Parent;
+ // move frame into hyperlink
+ Element::setParent( it, pLink );
+ break;
+ }
+ }
+ }
+ return ! pLink->Children.empty();
+}
+
+void PageElement::resolveHyperlinks()
+{
+ while( ! Hyperlinks.Children.empty() )
+ {
+ if( ! resolveHyperlink( Hyperlinks.Children.begin(), Children ) )
+ {
+ Hyperlinks.Children.pop_front();
+ }
+ }
+}
+
+void PageElement::resolveFontStyles( PDFIProcessor const & rProc )
+{
+ resolveUnderlines(rProc);
+}
+
+void PageElement::resolveUnderlines( PDFIProcessor const & rProc )
+{
+ // FIXME: currently the algorithm used is quadratic
+ // this could be solved by some sorting beforehand
+
+ std::vector<Element*> textAndHypers;
+ textAndHypers.reserve(Children.size());
+ for (auto const & p : Children)
+ {
+ if (p->dynCastAsTextElement() || dynamic_cast<HyperlinkElement*>(p.get()))
+ textAndHypers.push_back(p.get());
+ }
+
+ auto poly_it = Children.begin();
+ while( poly_it != Children.end() )
+ {
+ PolyPolyElement* pPoly = dynamic_cast< PolyPolyElement* >(poly_it->get());
+ if( ! pPoly || ! pPoly->Children.empty() )
+ {
+ ++poly_it;
+ continue;
+ }
+ /* check for: no filling
+ * only two points (FIXME: handle small rectangles, too)
+ * y coordinates of points are equal
+ */
+ if( pPoly->Action != PATH_STROKE )
+ {
+ ++poly_it;
+ continue;
+ }
+ if( pPoly->PolyPoly.count() != 1 )
+ {
+ ++poly_it;
+ continue;
+ }
+
+ bool bRemovePoly = false;
+ basegfx::B2DPolygon aPoly = pPoly->PolyPoly.getB2DPolygon(0);
+ if( aPoly.count() != 2 ||
+ aPoly.getB2DPoint(0).getY() != aPoly.getB2DPoint(1).getY() )
+ {
+ ++poly_it;
+ continue;
+ }
+ double l_x = aPoly.getB2DPoint(0).getX();
+ double r_x = aPoly.getB2DPoint(1).getX();
+ double u_y;
+ if( r_x < l_x )
+ {
+ u_y = r_x; r_x = l_x; l_x = u_y;
+ }
+ u_y = aPoly.getB2DPoint(0).getY();
+ for( Element* pEle : textAndHypers )
+ {
+ if( pEle->y <= u_y && pEle->y + pEle->h*1.1 >= u_y )
+ {
+ // first: is the element underlined completely ?
+ if( pEle->x + pEle->w*0.1 >= l_x &&
+ pEle->x + pEle->w*0.9 <= r_x )
+ {
+ TextElement* pText = pEle->dynCastAsTextElement();
+ if( pText )
+ {
+ const GraphicsContext& rTextGC = rProc.getGraphicsContext( pText->GCId );
+ if( ! rTextGC.isRotatedOrSkewed() )
+ {
+ bRemovePoly = true;
+ // retrieve ID for modified font
+ FontAttributes aAttr = rProc.getFont( pText->FontId );
+ aAttr.isUnderline = true;
+ pText->FontId = rProc.getFontId( aAttr );
+ }
+ }
+ else // must be HyperlinkElement
+ bRemovePoly = true;
+ }
+ // second: hyperlinks may be larger than their underline
+ // since they are just arbitrary rectangles in the action definition
+ else if( l_x >= pEle->x && r_x <= pEle->x+pEle->w &&
+ dynamic_cast< HyperlinkElement* >(pEle) != nullptr )
+ {
+ bRemovePoly = true;
+ }
+ }
+ }
+ if( bRemovePoly )
+ poly_it = Children.erase( poly_it );
+ else
+ ++poly_it;
+ }
+}
+
+DocumentElement::~DocumentElement()
+{
+}
+
+void DocumentElement::visitedBy( ElementTreeVisitor& rVisitor,
+ const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt)
+{
+ rVisitor.visit(*this, rParentIt);
+}
+
+bool isComplex(const css::uno::Reference<css::i18n::XBreakIterator>& rBreakIterator, TextElement* const pTextElem) {
+ OUString str(pTextElem->Text.toString());
+ for(int i=0; i< str.getLength(); i++)
+ {
+ sal_Int16 nType = rBreakIterator->getScriptType(str, i);
+ if (nType == css::i18n::ScriptType::COMPLEX)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/imagecontainer.cxx b/sdext/source/pdfimport/tree/imagecontainer.cxx
new file mode 100644
index 0000000000..a7154164d7
--- /dev/null
+++ b/sdext/source/pdfimport/tree/imagecontainer.cxx
@@ -0,0 +1,146 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <imagecontainer.hxx>
+#include <genericelements.hxx>
+#include <xmlemitter.hxx>
+
+#include <rtl/ustrbuf.hxx>
+#include <sal/log.hxx>
+#include <o3tl/safeint.hxx>
+#include <osl/diagnose.h>
+
+#include <com/sun/star/beans/PropertyValue.hpp>
+
+using namespace com::sun::star;
+
+namespace pdfi
+{
+
+namespace
+{
+
+const char aBase64EncodeTable[] =
+ { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
+
+OUString encodeBase64( const sal_Int8* i_pBuffer, const sal_Int32 i_nBufferLength )
+{
+ OUStringBuffer aBuf( (i_nBufferLength+1) * 4 / 3 );
+ const sal_Int32 nRemain(i_nBufferLength%3);
+ const sal_Int32 nFullTripleLength( i_nBufferLength - (i_nBufferLength%3));
+ sal_Int32 nBufPos( 0 );
+ for( sal_Int32 i = 0; i < nFullTripleLength; i += 3, nBufPos += 4 )
+ {
+ const sal_Int32 nBinary = (static_cast<sal_uInt8>(i_pBuffer[i + 0]) << 16) +
+ (static_cast<sal_uInt8>(i_pBuffer[i + 1]) << 8) +
+ static_cast<sal_uInt8>(i_pBuffer[i + 2]);
+
+ aBuf.append("====");
+
+ sal_uInt8 nIndex (static_cast<sal_uInt8>((nBinary & 0xFC0000) >> 18));
+ aBuf[nBufPos] = aBase64EncodeTable [nIndex];
+
+ nIndex = static_cast<sal_uInt8>((nBinary & 0x3F000) >> 12);
+ aBuf[nBufPos+1] = aBase64EncodeTable [nIndex];
+
+ nIndex = static_cast<sal_uInt8>((nBinary & 0xFC0) >> 6);
+ aBuf[nBufPos+2] = aBase64EncodeTable [nIndex];
+
+ nIndex = static_cast<sal_uInt8>((nBinary & 0x3F));
+ aBuf[nBufPos+3] = aBase64EncodeTable [nIndex];
+ }
+ if( nRemain > 0 )
+ {
+ aBuf.append("====");
+ sal_Int32 nBinary( 0 );
+ const sal_Int32 nStart(i_nBufferLength-nRemain);
+ switch(nRemain)
+ {
+ case 1: nBinary = static_cast<sal_uInt8>(i_pBuffer[nStart + 0]) << 16;
+ break;
+ case 2: nBinary = (static_cast<sal_uInt8>(i_pBuffer[nStart + 0]) << 16) +
+ (static_cast<sal_uInt8>(i_pBuffer[nStart + 1]) << 8);
+ break;
+ }
+ sal_uInt8 nIndex (static_cast<sal_uInt8>((nBinary & 0xFC0000) >> 18));
+ aBuf[nBufPos] = aBase64EncodeTable [nIndex];
+
+ nIndex = static_cast<sal_uInt8>((nBinary & 0x3F000) >> 12);
+ aBuf[nBufPos+1] = aBase64EncodeTable [nIndex];
+
+ if( nRemain == 2 )
+ {
+ nIndex = static_cast<sal_uInt8>((nBinary & 0xFC0) >> 6);
+ aBuf[nBufPos+2] = aBase64EncodeTable [nIndex];
+ }
+ }
+
+ return aBuf.makeStringAndClear();
+}
+
+} // namespace
+
+ImageContainer::ImageContainer()
+{}
+
+ImageId ImageContainer::addImage( const uno::Sequence<beans::PropertyValue>& xBitmap )
+{
+ m_aImages.push_back( xBitmap );
+ return m_aImages.size()-1;
+}
+
+void ImageContainer::writeBase64EncodedStream( ImageId nId, EmitContext& rContext )
+{
+ OSL_ASSERT( nId >= 0 && o3tl::make_unsigned(nId) < m_aImages.size() );
+
+ const uno::Sequence<beans::PropertyValue>& rEntry( m_aImages[nId] );
+
+ // find "InputSequence" property
+ const beans::PropertyValue* pAry(rEntry.getConstArray());
+ const sal_Int32 nLen(rEntry.getLength());
+ const beans::PropertyValue* pValue(
+ std::find_if(pAry, pAry+nLen,
+ [] (beans::PropertyValue const& v) -> bool {
+ return v.Name == "InputSequence";
+ }));
+
+ if (pValue == pAry + nLen )
+ {
+ SAL_WARN("sdext.pdfimport", "InputSequence not found");
+ return;
+ }
+
+ uno::Sequence<sal_Int8> aData;
+ if( !(pValue->Value >>= aData) )
+ {
+ SAL_WARN("sdext.pdfimport", "Wrong data type");
+ return;
+ }
+
+ rContext.rEmitter.write( encodeBase64( aData.getConstArray(), aData.getLength() ));
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.cxx b/sdext/source/pdfimport/tree/pdfiprocessor.cxx
new file mode 100644
index 0000000000..44485f4c25
--- /dev/null
+++ b/sdext/source/pdfimport/tree/pdfiprocessor.cxx
@@ -0,0 +1,717 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <pdfiprocessor.hxx>
+#include <xmlemitter.hxx>
+#include <pdfihelper.hxx>
+#include <imagecontainer.hxx>
+#include <genericelements.hxx>
+#include "style.hxx"
+#include <treevisiting.hxx>
+
+#include <sal/log.hxx>
+
+#include <comphelper/sequence.hxx>
+#include <basegfx/polygon/b2dpolygonclipper.hxx>
+#include <basegfx/polygon/b2dpolygontools.hxx>
+#include <basegfx/utils/canvastools.hxx>
+#include <basegfx/matrix/b2dhommatrix.hxx>
+#include <i18nutil/unicode.hxx>
+
+using namespace com::sun::star;
+
+
+namespace pdfi
+{
+
+ PDFIProcessor::PDFIProcessor( const uno::Reference< task::XStatusIndicator >& xStat ,
+ css::uno::Reference< css::uno::XComponentContext > const & xContext) :
+
+ m_xContext(xContext),
+ prevCharWidth(0),
+ m_pDocument( ElementFactory::createDocumentElement() ),
+ m_pCurPage(nullptr),
+ m_pCurElement(nullptr),
+ m_nNextFontId( 1 ),
+ m_nNextGCId( 1 ),
+ m_nPages(0),
+ m_nNextZOrder( 1 ),
+ m_xStatusIndicator( xStat )
+{
+ FontAttributes aDefFont;
+ aDefFont.familyName = "Helvetica";
+ aDefFont.fontWeight = u"normal"_ustr;
+ aDefFont.isItalic = false;
+ aDefFont.size = 10*PDFI_OUTDEV_RESOLUTION/72;
+ m_aIdToFont.insert({0, aDefFont});
+ m_aFontToId.insert({aDefFont, 0});
+
+ GraphicsContext aDefGC;
+ m_aGCStack.push_back( aDefGC );
+ m_aGCToId.insert({aDefGC, 0});
+ m_aIdToGC.insert({0, aDefGC});
+}
+
+void PDFIProcessor::setPageNum( sal_Int32 nPages )
+{
+ m_nPages = nPages;
+}
+
+
+void PDFIProcessor::pushState()
+{
+ GraphicsContextStack::value_type const a(m_aGCStack.back());
+ m_aGCStack.push_back(a);
+}
+
+void PDFIProcessor::popState()
+{
+ m_aGCStack.pop_back();
+}
+
+void PDFIProcessor::setFlatness( double value )
+{
+ getCurrentContext().Flatness = value;
+}
+
+void PDFIProcessor::setTransformation( const geometry::AffineMatrix2D& rMatrix )
+{
+ basegfx::unotools::homMatrixFromAffineMatrix(
+ getCurrentContext().Transformation,
+ rMatrix );
+}
+
+void PDFIProcessor::setLineDash( const uno::Sequence<double>& dashes,
+ double /*start*/ )
+{
+ // TODO(F2): factor in start offset
+ GraphicsContext& rContext( getCurrentContext() );
+ comphelper::sequenceToContainer(rContext.DashArray,dashes);
+}
+
+void PDFIProcessor::setLineJoin(sal_Int8 nJoin)
+{
+ getCurrentContext().LineJoin = nJoin;
+}
+
+void PDFIProcessor::setLineCap(sal_Int8 nCap)
+{
+ getCurrentContext().LineCap = nCap;
+}
+
+void PDFIProcessor::setMiterLimit(double)
+{
+ SAL_WARN("sdext.pdfimport", "PDFIProcessor::setMiterLimit(): not supported by ODF");
+}
+
+void PDFIProcessor::setLineWidth(double nWidth)
+{
+ getCurrentContext().LineWidth = nWidth;
+}
+
+void PDFIProcessor::setFillColor( const rendering::ARGBColor& rColor )
+{
+ getCurrentContext().FillColor = rColor;
+}
+
+void PDFIProcessor::setStrokeColor( const rendering::ARGBColor& rColor )
+{
+ getCurrentContext().LineColor = rColor;
+}
+
+void PDFIProcessor::setFont( const FontAttributes& i_rFont )
+{
+ FontAttributes aChangedFont( i_rFont );
+ GraphicsContext& rGC=getCurrentContext();
+ // for text render modes, please see PDF reference manual
+ if (rGC.TextRenderMode == 1)
+ {
+ aChangedFont.isOutline = true;
+ }
+ else if (rGC.TextRenderMode == 2)
+ {
+ // tdf#81484: faux bold is represented as "stroke+fill" (while using the same color for both stroke and fill) in pdf.
+ // Convert to bold instead if the stroke color is the same as the fill color,
+ // otherwise it should be outline.
+ if (getCurrentContext().LineColor == getCurrentContext().FillColor)
+ aChangedFont.fontWeight = u"bold"_ustr;
+ else
+ aChangedFont.isOutline = true;
+ }
+ FontToIdMap::const_iterator it = m_aFontToId.find( aChangedFont );
+ if( it != m_aFontToId.end() )
+ rGC.FontId = it->second;
+ else
+ {
+ m_aFontToId[ aChangedFont ] = m_nNextFontId;
+ m_aIdToFont[ m_nNextFontId ] = aChangedFont;
+ rGC.FontId = m_nNextFontId;
+ m_nNextFontId++;
+ }
+}
+
+void PDFIProcessor::setTextRenderMode( sal_Int32 i_nMode )
+{
+ GraphicsContext& rGC=getCurrentContext();
+ rGC.TextRenderMode = i_nMode;
+ IdToFontMap::iterator it = m_aIdToFont.find( rGC.FontId );
+ if( it != m_aIdToFont.end() )
+ setFont( it->second );
+}
+
+sal_Int32 PDFIProcessor::getFontId( const FontAttributes& rAttr ) const
+{
+ const sal_Int32 nCurFont = getCurrentContext().FontId;
+ const_cast<PDFIProcessor*>(this)->setFont( rAttr );
+ const sal_Int32 nFont = getCurrentContext().FontId;
+ const_cast<PDFIProcessor*>(this)->getCurrentContext().FontId = nCurFont;
+
+ return nFont;
+}
+
+// line diagnose block - start
+void PDFIProcessor::processGlyphLine()
+{
+ if (m_GlyphsList.empty())
+ return;
+
+ double spaceDetectBoundary = 0.0;
+
+ // Try to find space glyph and its width
+ for (CharGlyph & i : m_GlyphsList)
+ {
+ OUString& glyph = i.getGlyph();
+
+ sal_Unicode ch = '\0';
+ if (!glyph.isEmpty())
+ ch = glyph[0];
+
+ if ((ch == 0x20) || (ch == 0xa0))
+ {
+ double spaceWidth = i.getWidth();
+ spaceDetectBoundary = spaceWidth * 0.5;
+ break;
+ }
+ }
+
+ // If space glyph is not found, use average glyph width instead
+ if (spaceDetectBoundary == 0.0)
+ {
+ double avgGlyphWidth = 0.0;
+ for (const CharGlyph & i : m_GlyphsList)
+ avgGlyphWidth += i.getWidth();
+ avgGlyphWidth /= m_GlyphsList.size();
+ spaceDetectBoundary = avgGlyphWidth * 0.2;
+ }
+
+ FrameElement* frame = ElementFactory::createFrameElement(
+ m_GlyphsList[0].getCurElement(),
+ getGCId(m_GlyphsList[0].getGC()));
+ frame->ZOrder = m_nNextZOrder++;
+ frame->IsForText = true;
+ frame->FontSize = getFont(m_GlyphsList[0].getGC().FontId).size;
+ ParagraphElement* para = ElementFactory::createParagraphElement(frame);
+
+ for (size_t i = 0; i < m_GlyphsList.size(); i++)
+ {
+ bool prependSpace = false;
+ TextElement* text = ElementFactory::createTextElement(
+ para,
+ getGCId(m_GlyphsList[i].getGC()),
+ m_GlyphsList[i].getGC().FontId);
+ if (i == 0)
+ {
+ text->x = m_GlyphsList[0].getGC().Transformation.get(0, 2);
+ text->y = m_GlyphsList[0].getGC().Transformation.get(1, 2);
+ text->w = 0;
+ text->h = 0;
+ para->updateGeometryWith(text);
+ frame->updateGeometryWith(para);
+ }
+ else
+ {
+ double spaceSize = m_GlyphsList[i].getPrevSpaceWidth();
+ prependSpace = spaceSize > spaceDetectBoundary;
+ }
+ if (prependSpace)
+ text->Text.append(" ");
+ text->Text.append(m_GlyphsList[i].getGlyph());
+ }
+
+ m_GlyphsList.clear();
+}
+
+void PDFIProcessor::drawGlyphs( const OUString& rGlyphs,
+ const geometry::RealRectangle2D& rRect,
+ const geometry::Matrix2D& rFontMatrix,
+ double fontSize)
+{
+ double ascent = getFont(getCurrentContext().FontId).ascent;
+
+ basegfx::B2DHomMatrix fontMatrix(
+ rFontMatrix.m00, rFontMatrix.m01, 0.0,
+ rFontMatrix.m10, rFontMatrix.m11, 0.0);
+ fontMatrix.scale(fontSize, fontSize);
+
+ basegfx::B2DHomMatrix totalTextMatrix1(fontMatrix);
+ basegfx::B2DHomMatrix totalTextMatrix2(fontMatrix);
+ totalTextMatrix1.translate(rRect.X1, rRect.Y1);
+ totalTextMatrix2.translate(rRect.X2, rRect.Y2);
+
+ basegfx::B2DHomMatrix corrMatrix;
+ corrMatrix.scale(1.0, -1.0);
+ corrMatrix.translate(0.0, ascent);
+ totalTextMatrix1 = totalTextMatrix1 * corrMatrix;
+ totalTextMatrix2 = totalTextMatrix2 * corrMatrix;
+
+ totalTextMatrix1 *= getCurrentContext().Transformation;
+ totalTextMatrix2 *= getCurrentContext().Transformation;
+
+ basegfx::B2DHomMatrix invMatrix(totalTextMatrix1);
+ basegfx::B2DHomMatrix invPrevMatrix(prevTextMatrix);
+ invMatrix.invert();
+ invPrevMatrix.invert();
+ basegfx::B2DHomMatrix offsetMatrix1(totalTextMatrix1);
+ basegfx::B2DHomMatrix offsetMatrix2(totalTextMatrix2);
+ offsetMatrix1 *= invPrevMatrix;
+ offsetMatrix2 *= invMatrix;
+
+ double charWidth = offsetMatrix2.get(0, 2);
+ double prevSpaceWidth = offsetMatrix1.get(0, 2) - prevCharWidth;
+
+ if ((totalTextMatrix1.get(0, 0) != prevTextMatrix.get(0, 0)) ||
+ (totalTextMatrix1.get(0, 1) != prevTextMatrix.get(0, 1)) ||
+ (totalTextMatrix1.get(1, 0) != prevTextMatrix.get(1, 0)) ||
+ (totalTextMatrix1.get(1, 1) != prevTextMatrix.get(1, 1)) ||
+ (offsetMatrix1.get(0, 2) < 0.0) ||
+ (prevSpaceWidth > prevCharWidth * 1.3) ||
+ (!basegfx::fTools::equalZero(offsetMatrix1.get(1, 2), 0.0001)))
+ {
+ processGlyphLine();
+ }
+
+ CharGlyph aGlyph(m_pCurElement, getCurrentContext(), charWidth, prevSpaceWidth, rGlyphs);
+ aGlyph.getGC().Transformation = totalTextMatrix1;
+ m_GlyphsList.push_back(aGlyph);
+
+ prevCharWidth = charWidth;
+ prevTextMatrix = totalTextMatrix1;
+}
+
+void PDFIProcessor::endText()
+{
+ TextElement* pText = m_pCurElement->dynCastAsTextElement();
+ if( pText )
+ m_pCurElement = pText->Parent;
+}
+
+void PDFIProcessor::setupImage(ImageId nImage)
+{
+ const GraphicsContext& rGC(getCurrentContext());
+
+ basegfx::B2DTuple aScale, aTranslation;
+ double fRotate, fShearX;
+ rGC.Transformation.decompose(aScale, aTranslation, fRotate, fShearX);
+
+ const sal_Int32 nGCId = getGCId(rGC);
+ FrameElement* pFrame = ElementFactory::createFrameElement( m_pCurElement, nGCId );
+ ImageElement* pImageElement = ElementFactory::createImageElement( pFrame, nGCId, nImage );
+ pFrame->x = pImageElement->x = aTranslation.getX();
+ pFrame->y = pImageElement->y = aTranslation.getY();
+ pFrame->w = pImageElement->w = aScale.getX();
+ pFrame->h = pImageElement->h = aScale.getY();
+ pFrame->ZOrder = m_nNextZOrder++;
+
+ // Poppler wrapper takes into account that vertical axes of PDF and ODF are opposite,
+ // and it flips matrix vertically (see poppler's GfxState::GfxState()).
+ // But image internal vertical axis is independent of PDF vertical axis direction,
+ // so arriving matrix is extra-flipped relative to image.
+ // We force vertical flip here to compensate that.
+ pFrame->MirrorVertical = true;
+}
+
+void PDFIProcessor::drawMask(const uno::Sequence<beans::PropertyValue>& xBitmap,
+ bool /*bInvert*/ )
+{
+ // TODO(F3): Handle mask and inversion
+ setupImage( m_aImages.addImage(xBitmap) );
+}
+
+void PDFIProcessor::drawImage(const uno::Sequence<beans::PropertyValue>& xBitmap )
+{
+ setupImage( m_aImages.addImage(xBitmap) );
+}
+
+void PDFIProcessor::drawColorMaskedImage(const uno::Sequence<beans::PropertyValue>& xBitmap,
+ const uno::Sequence<uno::Any>& /*xMaskColors*/ )
+{
+ // TODO(F3): Handle mask colors
+ setupImage( m_aImages.addImage(xBitmap) );
+}
+
+void PDFIProcessor::drawMaskedImage(const uno::Sequence<beans::PropertyValue>& xBitmap,
+ const uno::Sequence<beans::PropertyValue>& /*xMask*/,
+ bool /*bInvertMask*/)
+{
+ // TODO(F3): Handle mask and inversion
+ setupImage( m_aImages.addImage(xBitmap) );
+}
+
+void PDFIProcessor::drawAlphaMaskedImage(const uno::Sequence<beans::PropertyValue>& xBitmap,
+ const uno::Sequence<beans::PropertyValue>& /*xMask*/)
+{
+ // TODO(F3): Handle mask
+
+ setupImage( m_aImages.addImage(xBitmap) );
+
+}
+
+void PDFIProcessor::strokePath( const uno::Reference< rendering::XPolyPolygon2D >& rPath )
+{
+ basegfx::B2DPolyPolygon aPoly=basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath);
+ aPoly.transform(getCurrentContext().Transformation);
+
+ PolyPolyElement* pPoly = ElementFactory::createPolyPolyElement(
+ m_pCurElement,
+ getGCId(getCurrentContext()),
+ aPoly,
+ PATH_STROKE );
+ pPoly->updateGeometry();
+ pPoly->ZOrder = m_nNextZOrder++;
+}
+
+void PDFIProcessor::fillPath( const uno::Reference< rendering::XPolyPolygon2D >& rPath )
+{
+ basegfx::B2DPolyPolygon aPoly=basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath);
+ aPoly.transform(getCurrentContext().Transformation);
+
+ PolyPolyElement* pPoly = ElementFactory::createPolyPolyElement(
+ m_pCurElement,
+ getGCId(getCurrentContext()),
+ aPoly,
+ PATH_FILL );
+ pPoly->updateGeometry();
+ pPoly->ZOrder = m_nNextZOrder++;
+}
+
+void PDFIProcessor::eoFillPath( const uno::Reference< rendering::XPolyPolygon2D >& rPath )
+{
+ basegfx::B2DPolyPolygon aPoly=basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath);
+ aPoly.transform(getCurrentContext().Transformation);
+
+ PolyPolyElement* pPoly = ElementFactory::createPolyPolyElement(
+ m_pCurElement,
+ getGCId(getCurrentContext()),
+ aPoly,
+ PATH_EOFILL );
+ pPoly->updateGeometry();
+ pPoly->ZOrder = m_nNextZOrder++;
+}
+
+void PDFIProcessor::intersectClip(const uno::Reference< rendering::XPolyPolygon2D >& rPath)
+{
+ // TODO(F3): interpret fill mode
+ basegfx::B2DPolyPolygon aNewClip = basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath);
+ aNewClip.transform(getCurrentContext().Transformation);
+ basegfx::B2DPolyPolygon aCurClip = getCurrentContext().Clip;
+
+ if( aCurClip.count() ) // #i92985# adapted API from (..., false, false) to (..., true, false)
+ aNewClip = basegfx::utils::clipPolyPolygonOnPolyPolygon( aCurClip, aNewClip, true, false );
+
+ getCurrentContext().Clip = aNewClip;
+}
+
+void PDFIProcessor::intersectEoClip(const uno::Reference< rendering::XPolyPolygon2D >& rPath)
+{
+ // TODO(F3): interpret fill mode
+ basegfx::B2DPolyPolygon aNewClip = basegfx::unotools::b2DPolyPolygonFromXPolyPolygon2D(rPath);
+ aNewClip.transform(getCurrentContext().Transformation);
+ basegfx::B2DPolyPolygon aCurClip = getCurrentContext().Clip;
+
+ if( aCurClip.count() ) // #i92985# adapted API from (..., false, false) to (..., true, false)
+ aNewClip = basegfx::utils::clipPolyPolygonOnPolyPolygon( aCurClip, aNewClip, true, false );
+
+ getCurrentContext().Clip = aNewClip;
+}
+
+void PDFIProcessor::hyperLink( const geometry::RealRectangle2D& rBounds,
+ const OUString& rURI )
+{
+ if( !rURI.isEmpty() )
+ {
+ HyperlinkElement* pLink = ElementFactory::createHyperlinkElement(
+ &m_pCurPage->Hyperlinks,
+ rURI );
+ pLink->x = rBounds.X1;
+ pLink->y = rBounds.Y1;
+ pLink->w = rBounds.X2-rBounds.X1;
+ pLink->h = rBounds.Y2-rBounds.Y1;
+ }
+}
+
+const FontAttributes& PDFIProcessor::getFont( sal_Int32 nFontId ) const
+{
+ IdToFontMap::const_iterator it = m_aIdToFont.find( nFontId );
+ if( it == m_aIdToFont.end() )
+ it = m_aIdToFont.find( 0 );
+ assert(it != m_aIdToFont.end());
+ return it->second;
+}
+
+sal_Int32 PDFIProcessor::getGCId( const GraphicsContext& rGC )
+{
+ sal_Int32 nGCId = 0;
+ auto it = m_aGCToId.find( rGC );
+ if( it != m_aGCToId.end() )
+ nGCId = it->second;
+ else
+ {
+ m_aGCToId.insert({rGC, m_nNextGCId});
+ m_aIdToGC.insert({m_nNextGCId, rGC});
+ nGCId = m_nNextGCId;
+ m_nNextGCId++;
+ }
+
+ return nGCId;
+}
+
+const GraphicsContext& PDFIProcessor::getGraphicsContext( sal_Int32 nGCId ) const
+{
+ auto it = m_aIdToGC.find( nGCId );
+ if( it == m_aIdToGC.end() )
+ it = m_aIdToGC.find( 0 );
+ assert(it != m_aIdToGC.end());
+ return it->second;
+}
+
+void PDFIProcessor::endPage()
+{
+ processGlyphLine(); // draw last line
+ if( m_xStatusIndicator.is()
+ && m_pCurPage
+ && m_pCurPage->PageNumber == m_nPages
+ )
+ m_xStatusIndicator->end();
+}
+
+void PDFIProcessor::startPage( const geometry::RealSize2D& rSize )
+{
+ // initial clip is to page bounds
+ getCurrentContext().Clip = basegfx::B2DPolyPolygon(
+ basegfx::utils::createPolygonFromRect(
+ basegfx::B2DRange( 0, 0, rSize.Width, rSize.Height )));
+
+ sal_Int32 nNextPageNr = m_pCurPage ? m_pCurPage->PageNumber+1 : 1;
+ if( m_xStatusIndicator.is() )
+ {
+ if( nNextPageNr == 1 )
+ startIndicator( " " );
+ m_xStatusIndicator->setValue( nNextPageNr );
+ }
+ m_pCurPage = ElementFactory::createPageElement(m_pDocument.get(), nNextPageNr);
+ m_pCurElement = m_pCurPage;
+ m_pCurPage->w = rSize.Width;
+ m_pCurPage->h = rSize.Height;
+ m_nNextZOrder = 1;
+
+
+}
+
+void PDFIProcessor::emit( XmlEmitter& rEmitter,
+ const TreeVisitorFactory& rVisitorFactory )
+{
+#if OSL_DEBUG_LEVEL > 0
+ m_pDocument->emitStructure( 0 );
+#endif
+
+ ElementTreeVisitorSharedPtr optimizingVisitor(
+ rVisitorFactory.createOptimizingVisitor(*this));
+ // FIXME: localization
+ startIndicator( " " );
+ m_pDocument->visitedBy( *optimizingVisitor, std::list<std::unique_ptr<Element>>::const_iterator());
+
+#if OSL_DEBUG_LEVEL > 0
+ m_pDocument->emitStructure( 0 );
+#endif
+
+ // get styles
+ StyleContainer aStyles;
+ ElementTreeVisitorSharedPtr finalizingVisitor(
+ rVisitorFactory.createStyleCollectingVisitor(aStyles,*this));
+ // FIXME: localization
+
+ m_pDocument->visitedBy( *finalizingVisitor, std::list<std::unique_ptr<Element>>::const_iterator() );
+
+ EmitContext aContext( rEmitter, aStyles, m_aImages, *this, m_xStatusIndicator, m_xContext );
+ ElementTreeVisitorSharedPtr aEmittingVisitor(
+ rVisitorFactory.createEmittingVisitor(aContext));
+
+ PropertyMap aProps;
+ // document prolog
+ #define OASIS_STR "urn:oasis:names:tc:opendocument:xmlns:"
+ aProps[ "xmlns:office" ] = OASIS_STR "office:1.0" ;
+ aProps[ "xmlns:style" ] = OASIS_STR "style:1.0" ;
+ aProps[ "xmlns:text" ] = OASIS_STR "text:1.0" ;
+ aProps[ "xmlns:svg" ] = OASIS_STR "svg-compatible:1.0" ;
+ aProps[ "xmlns:table" ] = OASIS_STR "table:1.0" ;
+ aProps[ "xmlns:draw" ] = OASIS_STR "drawing:1.0" ;
+ aProps[ "xmlns:fo" ] = OASIS_STR "xsl-fo-compatible:1.0" ;
+ aProps[ "xmlns:xlink"] = "http://www.w3.org/1999/xlink";
+ aProps[ "xmlns:dc"] = "http://purl.org/dc/elements/1.1/";
+ aProps[ "xmlns:number"] = OASIS_STR "datastyle:1.0" ;
+ aProps[ "xmlns:presentation"] = OASIS_STR "presentation:1.0" ;
+ aProps[ "xmlns:math"] = "http://www.w3.org/1998/Math/MathML";
+ aProps[ "xmlns:form"] = OASIS_STR "form:1.0" ;
+ aProps[ "xmlns:script"] = OASIS_STR "script:1.0" ;
+ aProps[ "xmlns:dom"] = "http://www.w3.org/2001/xml-events";
+ aProps[ "xmlns:xforms"] = "http://www.w3.org/2002/xforms";
+ aProps[ "xmlns:xsd"] = "http://www.w3.org/2001/XMLSchema";
+ aProps[ "xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance";
+ aProps[ "office:version" ] = "1.0";
+
+ aContext.rEmitter.beginTag( "office:document", aProps );
+
+ // emit style list
+ aStyles.emit( aContext, *aEmittingVisitor );
+
+ m_pDocument->visitedBy( *aEmittingVisitor, std::list<std::unique_ptr<Element>>::const_iterator() );
+ aContext.rEmitter.endTag( "office:document" );
+ endIndicator();
+}
+
+void PDFIProcessor::startIndicator( const OUString& rText )
+{
+ sal_Int32 nElements = m_nPages;
+ if( !m_xStatusIndicator.is() )
+ return;
+
+ sal_Int32 nLength = rText.getLength();
+ OUStringBuffer aStr( nLength*2 );
+ const sal_Unicode* pText = rText.getStr();
+ for( int i = 0; i < nLength; i++ )
+ {
+ if( nLength-i > 1&&
+ pText[i] == '%' &&
+ pText[i+1] == 'd'
+ )
+ {
+ aStr.append( nElements );
+ i++;
+ }
+ else
+ aStr.append( pText[i] );
+ }
+ m_xStatusIndicator->start( aStr.makeStringAndClear(), nElements );
+}
+
+void PDFIProcessor::endIndicator()
+{
+ if( m_xStatusIndicator.is() )
+ m_xStatusIndicator->end();
+}
+
+static bool lr_tb_sort( std::unique_ptr<Element> const & pLeft, std::unique_ptr<Element> const & pRight )
+{
+ // Ensure irreflexivity (which could be compromised if h or w is negative):
+ if (pLeft == pRight)
+ return false;
+
+ // first: top-bottom sorting
+
+ // Note: allow for 10% overlap on text lines since text lines are usually
+ // of the same order as font height whereas the real paint area
+ // of text is usually smaller
+ double fudge_factor_left = 0.0, fudge_factor_right = 0.0;
+ if( pLeft->dynCastAsTextElement() )
+ fudge_factor_left = 0.1;
+ if( pRight->dynCastAsTextElement() )
+ fudge_factor_right = 0.1;
+
+ // Allow negative height
+ double lower_boundary_left = pLeft->y + std::max(pLeft->h, 0.0) - fabs(pLeft->h) * fudge_factor_left;
+ double lower_boundary_right = pRight->y + std::max(pRight->h, 0.0) - fabs(pRight->h) * fudge_factor_right;
+ double upper_boundary_left = pLeft->y + std::min(pLeft->h, 0.0);
+ double upper_boundary_right = pRight->y + std::min(pRight->h, 0.0);
+ // if left's lower boundary is above right's upper boundary
+ // then left is smaller
+ if( lower_boundary_left < upper_boundary_right )
+ return true;
+ // if right's lower boundary is above left's upper boundary
+ // then left is definitely not smaller
+ if( lower_boundary_right < upper_boundary_left )
+ return false;
+
+ // Allow negative width
+ double left_boundary_left = pLeft->y + std::min(pLeft->w, 0.0);
+ double left_boundary_right = pRight->y + std::min(pRight->w, 0.0);
+ double right_boundary_left = pLeft->y + std::max(pLeft->w, 0.0);
+ double right_boundary_right = pRight->y + std::max(pRight->w, 0.0);
+ // by now we have established that left and right are inside
+ // a "line", that is they have vertical overlap
+ // second: left-right sorting
+ // if left's right boundary is left to right's left boundary
+ // then left is smaller
+ if( right_boundary_left < left_boundary_right )
+ return true;
+ // if right's right boundary is left to left's left boundary
+ // then left is definitely not smaller
+ if( right_boundary_right < left_boundary_left )
+ return false;
+
+ // here we have established vertical and horizontal overlap
+ // so sort left first, top second
+ if( pLeft->x < pRight->x )
+ return true;
+ if( pRight->x < pLeft->x )
+ return false;
+ if( pLeft->y < pRight->y )
+ return true;
+
+ return false;
+}
+
+void PDFIProcessor::sortElements(Element* pEle)
+{
+ if( pEle->Children.empty() )
+ return;
+
+ // sort method from std::list is equivalent to stable_sort
+ // See S Meyers, Effective STL
+ pEle->Children.sort(lr_tb_sort);
+}
+
+/* Produce mirrored-image for each code point which has the Bidi_Mirrored property, within a string.
+ This need to be done in forward order.
+*/
+OUString PDFIProcessor::SubstituteBidiMirrored(const OUString& rString)
+{
+ const sal_Int32 nLen = rString.getLength();
+ OUStringBuffer aMirror(nLen);
+
+ for (sal_Int32 i = 0; i < nLen;) {
+ const sal_uInt32 nCodePoint = rString.iterateCodePoints(&i);
+ aMirror.appendUtf32(unicode::GetMirroredChar(nCodePoint));
+ }
+ return aMirror.makeStringAndClear();
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/style.cxx b/sdext/source/pdfimport/tree/style.cxx
new file mode 100644
index 0000000000..3f1d20eb42
--- /dev/null
+++ b/sdext/source/pdfimport/tree/style.cxx
@@ -0,0 +1,246 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include "style.hxx"
+#include <genericelements.hxx>
+#include <xmlemitter.hxx>
+#include <rtl/ustrbuf.hxx>
+
+#include <algorithm>
+#include <string_view>
+
+using namespace pdfi;
+
+
+StyleContainer::StyleContainer() :
+ m_nNextId( 1 )
+{
+}
+
+sal_Int32 StyleContainer::impl_getStyleId( const Style& rStyle, bool bSubStyle )
+{
+ sal_Int32 nRet = -1;
+
+ // construct HashedStyle to find or insert
+ HashedStyle aSearchStyle;
+ aSearchStyle.Name = rStyle.Name;
+ aSearchStyle.Properties = rStyle.Properties;
+ aSearchStyle.Contents = rStyle.Contents;
+ aSearchStyle.ContainedElement = rStyle.ContainedElement;
+ for(Style* pSubStyle : rStyle.SubStyles)
+ aSearchStyle.SubStyles.push_back( impl_getStyleId( *pSubStyle, true ) );
+
+ std::unordered_map< HashedStyle, sal_Int32, StyleHash >::iterator it =
+ m_aStyleToId.find( aSearchStyle );
+
+ if( it != m_aStyleToId.end() )
+ {
+ nRet = it->second;
+ RefCountedHashedStyle& rFound = m_aIdToStyle[ nRet ];
+ // increase refcount on this style
+ rFound.RefCount++;
+ if( ! bSubStyle )
+ rFound.style.IsSubStyle = false;
+ }
+ else
+ {
+ nRet = m_nNextId++;
+ // create new style
+ RefCountedHashedStyle& rNew = m_aIdToStyle[ nRet ];
+ rNew.style = aSearchStyle;
+ rNew.RefCount = 1;
+ rNew.style.IsSubStyle = bSubStyle;
+ // fill the style hash to find the id
+ m_aStyleToId[ rNew.style ] = nRet;
+ }
+ return nRet;
+}
+
+sal_Int32 StyleContainer::getStandardStyleId( std::string_view rName )
+{
+ PropertyMap aProps;
+ aProps[ "style:family" ] = OStringToOUString( rName, RTL_TEXTENCODING_UTF8 );
+ aProps[ "style:name" ] = "standard";
+
+ Style aStyle( "style:style"_ostr, std::move(aProps) );
+ return getStyleId( aStyle );
+}
+
+const PropertyMap* StyleContainer::getProperties( sal_Int32 nStyleId ) const
+{
+ std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator it =
+ m_aIdToStyle.find( nStyleId );
+ return it != m_aIdToStyle.end() ? &(it->second.style.Properties) : nullptr;
+}
+
+sal_Int32 StyleContainer::setProperties( sal_Int32 nStyleId, PropertyMap&& rNewProps )
+{
+ sal_Int32 nRet = -1;
+ std::unordered_map< sal_Int32, RefCountedHashedStyle >::iterator it =
+ m_aIdToStyle.find( nStyleId );
+ if( it != m_aIdToStyle.end() )
+ {
+ if( it->second.RefCount == 1 )
+ {
+ nRet = it->first;
+ // erase old hash to id mapping
+ m_aStyleToId.erase( it->second.style );
+ // change properties
+ it->second.style.Properties = std::move(rNewProps);
+ // fill in new hash to id mapping
+ m_aStyleToId[ it->second.style ] = nRet;
+ }
+ else
+ {
+ // decrease refcount on old instance
+ it->second.RefCount--;
+ // acquire new HashedStyle
+ HashedStyle aSearchStyle;
+ aSearchStyle.Name = it->second.style.Name;
+ aSearchStyle.Properties = std::move(rNewProps);
+ aSearchStyle.Contents = it->second.style.Contents;
+ aSearchStyle.ContainedElement = it->second.style.ContainedElement;
+ aSearchStyle.SubStyles = it->second.style.SubStyles;
+ aSearchStyle.IsSubStyle = it->second.style.IsSubStyle;
+
+ // find out whether this new style already exists
+ std::unordered_map< HashedStyle, sal_Int32, StyleHash >::iterator new_it =
+ m_aStyleToId.find( aSearchStyle );
+ if( new_it != m_aStyleToId.end() )
+ {
+ nRet = new_it->second;
+ m_aIdToStyle[ nRet ].RefCount++;
+ }
+ else
+ {
+ nRet = m_nNextId++;
+ // create new style with new id
+ RefCountedHashedStyle& rNew = m_aIdToStyle[ nRet ];
+ rNew.style = aSearchStyle;
+ rNew.RefCount = 1;
+ // fill style to id hash
+ m_aStyleToId[ aSearchStyle ] = nRet;
+ }
+ }
+ }
+ return nRet;
+}
+
+OUString StyleContainer::getStyleName( sal_Int32 nStyle ) const
+{
+ OUStringBuffer aRet( 64 );
+
+ std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator style_it =
+ m_aIdToStyle.find( nStyle );
+ if( style_it != m_aIdToStyle.end() )
+ {
+ const HashedStyle& rStyle = style_it->second.style;
+
+ PropertyMap::const_iterator name_it = rStyle.Properties.find( "style:name" );
+ if( name_it != rStyle.Properties.end() )
+ aRet.append( name_it->second );
+ else
+ {
+ PropertyMap::const_iterator fam_it = rStyle.Properties.find( "style:family" );
+ OUString aStyleName;
+ if( fam_it != rStyle.Properties.end() )
+ {
+ aStyleName = fam_it->second;
+ }
+ else
+ aStyleName = OStringToOUString( rStyle.Name, RTL_TEXTENCODING_ASCII_US );
+ sal_Int32 nIndex = aStyleName.lastIndexOf( ':' );
+ aRet.append( aStyleName.subView(nIndex+1) + OUString::number( nStyle ) );
+ }
+ }
+ else
+ {
+ aRet.append( "invalid style id " + OUString::number(nStyle) );
+ }
+
+ return aRet.makeStringAndClear();
+}
+
+void StyleContainer::impl_emitStyle( sal_Int32 nStyleId,
+ EmitContext& rContext,
+ ElementTreeVisitor& rContainedElemVisitor )
+{
+ std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator it = m_aIdToStyle.find( nStyleId );
+ if( it == m_aIdToStyle.end() )
+ return;
+
+ const HashedStyle& rStyle = it->second.style;
+ PropertyMap aProps( rStyle.Properties );
+ if( !rStyle.IsSubStyle )
+ aProps[ "style:name" ] = getStyleName( nStyleId );
+ if (rStyle.Name == "draw:stroke-dash")
+ aProps[ "draw:name" ] = aProps[ "style:name" ];
+ rContext.rEmitter.beginTag( rStyle.Name.getStr(), aProps );
+
+ for(sal_Int32 nSubStyle : rStyle.SubStyles)
+ impl_emitStyle( nSubStyle, rContext, rContainedElemVisitor );
+ if( !rStyle.Contents.isEmpty() )
+ rContext.rEmitter.write( rStyle.Contents );
+ if( rStyle.ContainedElement )
+ rStyle.ContainedElement->visitedBy( rContainedElemVisitor,
+ std::list<std::unique_ptr<Element>>::iterator() );
+ rContext.rEmitter.endTag( rStyle.Name.getStr() );
+}
+
+void StyleContainer::emit( EmitContext& rContext,
+ ElementTreeVisitor& rContainedElemVisitor )
+{
+ std::vector< sal_Int32 > aMasterPageSection, aAutomaticStyleSection, aOfficeStyleSection;
+ for( const auto& rEntry : m_aIdToStyle )
+ {
+ if( ! rEntry.second.style.IsSubStyle )
+ {
+ if( rEntry.second.style.Name == "style:master-page" )
+ aMasterPageSection.push_back( rEntry.first );
+ else if( getStyleName( rEntry.first ) == "standard" )
+ aOfficeStyleSection.push_back( rEntry.first );
+ else
+ aAutomaticStyleSection.push_back( rEntry.first );
+ }
+ }
+
+ if( ! aMasterPageSection.empty() )
+ std::stable_sort( aMasterPageSection.begin(), aMasterPageSection.end(), StyleIdNameSort(&m_aIdToStyle) );
+ if( ! aAutomaticStyleSection.empty() )
+ std::stable_sort( aAutomaticStyleSection.begin(), aAutomaticStyleSection.end(), StyleIdNameSort(&m_aIdToStyle) );
+ if( ! aOfficeStyleSection.empty() )
+ std::stable_sort( aOfficeStyleSection.begin(), aOfficeStyleSection.end(), StyleIdNameSort(&m_aIdToStyle) );
+
+ int n = 0, nElements = 0;
+ rContext.rEmitter.beginTag( "office:styles", PropertyMap() );
+ for( n = 0, nElements = aOfficeStyleSection.size(); n < nElements; n++ )
+ impl_emitStyle( aOfficeStyleSection[n], rContext, rContainedElemVisitor );
+ rContext.rEmitter.endTag( "office:styles" );
+ rContext.rEmitter.beginTag( "office:automatic-styles", PropertyMap() );
+ for( n = 0, nElements = aAutomaticStyleSection.size(); n < nElements; n++ )
+ impl_emitStyle( aAutomaticStyleSection[n], rContext, rContainedElemVisitor );
+ rContext.rEmitter.endTag( "office:automatic-styles" );
+ rContext.rEmitter.beginTag( "office:master-styles", PropertyMap() );
+ for( n = 0, nElements = aMasterPageSection.size(); n < nElements; n++ )
+ impl_emitStyle( aMasterPageSection[n], rContext, rContainedElemVisitor );
+ rContext.rEmitter.endTag( "office:master-styles" );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/style.hxx b/sdext/source/pdfimport/tree/style.hxx
new file mode 100644
index 0000000000..572be241e9
--- /dev/null
+++ b/sdext/source/pdfimport/tree/style.hxx
@@ -0,0 +1,166 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_STYLE_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_STYLE_HXX
+
+#include <pdfihelper.hxx>
+#include <numeric>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+#include <rtl/ustring.hxx>
+#include <rtl/string.hxx>
+#include <treevisiting.hxx>
+
+namespace pdfi
+{
+ struct Element;
+ struct EmitContext;
+
+ class StyleContainer
+ {
+ public:
+ struct Style
+ {
+ OString Name;
+ PropertyMap Properties;
+ OUString Contents;
+ Element* ContainedElement;
+ std::vector< Style* > SubStyles;
+
+ Style( const OString& rName, PropertyMap&& rProps ) :
+ Name( rName ),
+ Properties( std::move(rProps) ),
+ ContainedElement( nullptr )
+ {}
+ };
+
+ private:
+ struct HashedStyle
+ {
+ OString Name;
+ PropertyMap Properties;
+ OUString Contents;
+ Element* ContainedElement;
+ std::vector<sal_Int32> SubStyles;
+
+ bool IsSubStyle;
+
+ HashedStyle() : ContainedElement( nullptr ), IsSubStyle( true ) {}
+
+ size_t hashCode() const
+ {
+ size_t nRet = std::accumulate(Properties.begin(), Properties.end(), size_t(Name.hashCode()),
+ [](const size_t& sum, const PropertyMap::value_type& rEntry) {
+ return sum ^ size_t(rEntry.first.hashCode()) ^ size_t(rEntry.second.hashCode());
+ });
+ nRet ^= size_t(Contents.hashCode());
+ nRet ^= reinterpret_cast<size_t>(ContainedElement);
+ for( size_t n = 0; n < SubStyles.size(); ++n )
+ nRet ^= size_t(SubStyles[n]);
+ return nRet;
+ }
+
+ bool operator==(const HashedStyle& rRight) const
+ {
+ if( Name != rRight.Name ||
+ Properties != rRight.Properties ||
+ Contents != rRight.Contents ||
+ ContainedElement != rRight.ContainedElement ||
+ SubStyles.size() != rRight.SubStyles.size()
+ )
+ return false;
+ for( size_t n = 0; n < SubStyles.size(); ++n )
+ {
+ if( SubStyles[n] != rRight.SubStyles[n] )
+ return false;
+ }
+ return true;
+ }
+ };
+
+ struct RefCountedHashedStyle {
+ HashedStyle style;
+ sal_Int32 RefCount = 0;
+ };
+
+ struct StyleHash;
+ friend struct StyleHash;
+ struct StyleHash
+ {
+ size_t operator()( const StyleContainer::HashedStyle& rStyle ) const
+ {
+ return rStyle.hashCode();
+ }
+ };
+
+ struct StyleIdNameSort;
+ friend struct StyleIdNameSort;
+ struct StyleIdNameSort
+ {
+ const std::unordered_map< sal_Int32, RefCountedHashedStyle >* m_pMap;
+
+ explicit StyleIdNameSort( const std::unordered_map< sal_Int32, RefCountedHashedStyle >* pMap ) :
+ m_pMap(pMap)
+ {}
+ bool operator()( sal_Int32 nLeft, sal_Int32 nRight )
+ {
+ const std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator left_it =
+ m_pMap->find( nLeft );
+ const std::unordered_map< sal_Int32, RefCountedHashedStyle >::const_iterator right_it =
+ m_pMap->find( nRight );
+ if( left_it == m_pMap->end() )
+ return false;
+ else if( right_it == m_pMap->end() )
+ return true;
+ else
+ return left_it->second.style.Name < right_it->second.style.Name;
+ }
+ };
+
+ sal_Int32 m_nNextId;
+ std::unordered_map< sal_Int32, RefCountedHashedStyle > m_aIdToStyle;
+ std::unordered_map< HashedStyle, sal_Int32, StyleHash > m_aStyleToId;
+
+ void impl_emitStyle( sal_Int32 nStyleId,
+ EmitContext& rContext,
+ ElementTreeVisitor& rContainedElemVisitor );
+
+ public:
+ StyleContainer();
+
+ void emit( EmitContext& rContext,
+ ElementTreeVisitor& rContainedElemVisitor );
+
+ sal_Int32 impl_getStyleId( const Style& rStyle, bool bSubStyle );
+ sal_Int32 getStyleId( const Style& rStyle )
+ { return impl_getStyleId( rStyle, false ); }
+ sal_Int32 getStandardStyleId( std::string_view rFamily );
+
+ // returns NULL for an invalid style id
+ const PropertyMap* getProperties( sal_Int32 nStyleId ) const;
+ sal_Int32 setProperties( sal_Int32 nStyleId, PropertyMap&& rNewProps );
+ OUString getStyleName( sal_Int32 nStyle ) const;
+ };
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/treevisitorfactory.cxx b/sdext/source/pdfimport/tree/treevisitorfactory.cxx
new file mode 100644
index 0000000000..495bf0bcb4
--- /dev/null
+++ b/sdext/source/pdfimport/tree/treevisitorfactory.cxx
@@ -0,0 +1,111 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <treevisitorfactory.hxx>
+#include "writertreevisiting.hxx"
+#include "drawtreevisiting.hxx"
+
+namespace pdfi
+{
+ namespace {
+
+ struct WriterTreeVisitorFactory : public TreeVisitorFactory
+ {
+ WriterTreeVisitorFactory() {}
+
+ virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor& rProc) const override
+ {
+ return std::make_shared<WriterXmlOptimizer>(rProc);
+ }
+
+ virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor(
+ StyleContainer& rStyles,
+ PDFIProcessor& rProc ) const override
+ {
+ return std::make_shared<WriterXmlFinalizer>(rStyles,rProc);
+ }
+
+ virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext& rEmitContext) const override
+ {
+ return std::make_shared<WriterXmlEmitter>(rEmitContext);
+ }
+ };
+
+ struct ImpressTreeVisitorFactory : public TreeVisitorFactory
+ {
+ ImpressTreeVisitorFactory() {}
+
+ virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor& rProc) const override
+ {
+ return std::make_shared<DrawXmlOptimizer>(rProc);
+ }
+
+ virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor(
+ StyleContainer& rStyles,
+ PDFIProcessor& rProc ) const override
+ {
+ return std::make_shared<DrawXmlFinalizer>(rStyles,rProc);
+ }
+
+ virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext& rEmitContext) const override
+ {
+ return std::make_shared<DrawXmlEmitter>(rEmitContext, DrawXmlEmitter::IMPRESS_DOC);
+ }
+ };
+
+ struct DrawTreeVisitorFactory : public TreeVisitorFactory
+ {
+ DrawTreeVisitorFactory() {}
+
+ virtual std::shared_ptr<ElementTreeVisitor> createOptimizingVisitor(PDFIProcessor& rProc) const override
+ {
+ return std::make_shared<DrawXmlOptimizer>(rProc);
+ }
+
+ virtual std::shared_ptr<ElementTreeVisitor> createStyleCollectingVisitor(
+ StyleContainer& rStyles,
+ PDFIProcessor& rProc ) const override
+ {
+ return std::make_shared<DrawXmlFinalizer>(rStyles,rProc);
+ }
+
+ virtual std::shared_ptr<ElementTreeVisitor> createEmittingVisitor(EmitContext& rEmitContext) const override
+ {
+ return std::make_shared<DrawXmlEmitter>(rEmitContext, DrawXmlEmitter::DRAW_DOC);
+ }
+ };
+
+ }
+
+ TreeVisitorFactorySharedPtr createWriterTreeVisitorFactory()
+ {
+ return std::make_shared<WriterTreeVisitorFactory>();
+ }
+ TreeVisitorFactorySharedPtr createImpressTreeVisitorFactory()
+ {
+ return std::make_shared<ImpressTreeVisitorFactory>();
+ }
+ TreeVisitorFactorySharedPtr createDrawTreeVisitorFactory()
+ {
+ return std::make_shared<DrawTreeVisitorFactory>();
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx
new file mode 100644
index 0000000000..9ecce8f48b
--- /dev/null
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -0,0 +1,1347 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+#include <sal/log.hxx>
+#include <string_view>
+
+#include <pdfiprocessor.hxx>
+#include <xmlemitter.hxx>
+#include <pdfihelper.hxx>
+#include <imagecontainer.hxx>
+#include "style.hxx"
+#include "writertreevisiting.hxx"
+#include <genericelements.hxx>
+
+#include <basegfx/polygon/b2dpolypolygontools.hxx>
+#include <osl/diagnose.h>
+#include <com/sun/star/i18n/CharacterClassification.hpp>
+#include <com/sun/star/i18n/DirectionProperty.hpp>
+#include <comphelper/string.hxx>
+
+using namespace ::com::sun::star;
+using namespace ::com::sun::star::lang;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::uno;
+
+namespace pdfi
+{
+
+const Reference<XBreakIterator>& WriterXmlOptimizer::GetBreakIterator()
+{
+ if (!mxBreakIter.is())
+ {
+ mxBreakIter = BreakIterator::create(m_rProcessor.m_xContext);
+ }
+ return mxBreakIter;
+}
+
+const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification()
+{
+ if ( !mxCharClass.is() )
+ {
+ Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW );
+ mxCharClass = CharacterClassification::create(xContext);
+ }
+ return mxCharClass;
+}
+
+void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( elem.Children.empty() )
+ return;
+
+ const char* pType = dynamic_cast<DrawElement*>(elem.Children.front().get()) ? "draw:a" : "text:a";
+
+ PropertyMap aProps;
+ aProps[ "xlink:type" ] = "simple";
+ aProps[ "xlink:href" ] = elem.URI;
+ aProps[ "office:target-frame-name" ] = "_blank";
+ aProps[ "xlink:show" ] = "new";
+
+ m_rEmitContext.rEmitter.beginTag( pType, aProps );
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+ m_rEmitContext.rEmitter.endTag( pType );
+}
+
+void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( elem.Text.isEmpty() )
+ return;
+
+ PropertyMap aProps = {};
+ const sal_Unicode strSpace = 0x0020;
+ const sal_Unicode strNbSpace = 0x00A0;
+ const sal_Unicode tabSpace = 0x0009;
+
+ if( elem.StyleId != -1 )
+ {
+ aProps[ OUString( "text:style-name" ) ] =
+ m_rEmitContext.rStyles.getStyleName( elem.StyleId );
+ }
+
+ OUString str(elem.Text.toString());
+
+ // Check for RTL
+ bool isRTL = false;
+ Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() );
+ if( xCC.is() )
+ {
+ for(int i=1; i< elem.Text.getLength(); i++)
+ {
+ i18n::DirectionProperty nType = static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i ));
+ if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING ||
+ nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE
+ )
+ isRTL = true;
+ }
+ }
+
+ if (isRTL) // If so, reverse string
+ {
+ // First, produce mirrored-image for each code point which has the Bidi_Mirrored property.
+ str = PDFIProcessor::SubstituteBidiMirrored(str);
+ // Then, reverse the code points in the string, in backward order.
+ str = ::comphelper::string::reverseCodePoints(str);
+ }
+
+ m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
+
+ sal_Unicode strToken;
+ for (int i = 0; i < elem.Text.getLength(); i++)
+ {
+ strToken = str[i];
+ if (strToken == strSpace || strToken == strNbSpace)
+ {
+ aProps["text:c"] = "1";
+ m_rEmitContext.rEmitter.beginTag("text:s", aProps);
+ m_rEmitContext.rEmitter.endTag("text:s");
+ }
+ else if (strToken == tabSpace)
+ {
+ m_rEmitContext.rEmitter.beginTag("text:tab", aProps);
+ m_rEmitContext.rEmitter.endTag("text:tab");
+ }
+ else
+ m_rEmitContext.rEmitter.write(OUString(strToken));
+ }
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ m_rEmitContext.rEmitter.endTag( "text:span" );
+}
+
+void WriterXmlEmitter::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ PropertyMap aProps;
+ if( elem.StyleId != -1 )
+ {
+ aProps[ "text:style-name" ] = m_rEmitContext.rStyles.getStyleName( elem.StyleId );
+ }
+ const char* pTagType = "text:p";
+ if( elem.Type == ParagraphElement::Headline )
+ pTagType = "text:h";
+ m_rEmitContext.rEmitter.beginTag( pTagType, aProps );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ m_rEmitContext.rEmitter.endTag( pTagType );
+}
+
+void WriterXmlEmitter::fillFrameProps( DrawElement& rElem,
+ PropertyMap& rProps,
+ const EmitContext& rEmitContext )
+{
+ double rel_x = rElem.x, rel_y = rElem.y;
+
+ // find anchor type by recursing though parents
+ Element* pAnchor = &rElem;
+ ParagraphElement* pParaElt = nullptr;
+ PageElement* pPage = nullptr;
+ while ((pAnchor = pAnchor->Parent))
+ {
+ if ((pParaElt = dynamic_cast<ParagraphElement*>(pAnchor)))
+ break;
+ if ((pPage = dynamic_cast<PageElement*>(pAnchor)))
+ break;
+ }
+ if( pAnchor )
+ {
+ if (pParaElt)
+ {
+ rProps[ "text:anchor-type" ] = rElem.isCharacter
+ ? std::u16string_view(u"character") : std::u16string_view(u"paragraph");
+ }
+ else
+ {
+ assert(pPage); // guaranteed by the while loop above
+ rProps[ "text:anchor-type" ] = "page";
+ rProps[ "text:anchor-page-number" ] = OUString::number(pPage->PageNumber);
+ }
+ rel_x -= pAnchor->x;
+ rel_y -= pAnchor->y;
+ }
+
+ rProps[ "draw:z-index" ] = OUString::number( rElem.ZOrder );
+ rProps[ "draw:style-name"] = rEmitContext.rStyles.getStyleName( rElem.StyleId );
+ rProps[ "svg:width" ] = convertPixelToUnitString( rElem.w );
+ rProps[ "svg:height" ] = convertPixelToUnitString( rElem.h );
+
+ const GraphicsContext& rGC =
+ rEmitContext.rProcessor.getGraphicsContext( rElem.GCId );
+ if( rGC.Transformation.isIdentity() )
+ {
+ if( !rElem.isCharacter )
+ {
+ rProps[ "svg:x" ] = convertPixelToUnitString( rel_x );
+ rProps[ "svg:y" ] = convertPixelToUnitString( rel_y );
+ }
+ }
+ else
+ {
+ basegfx::B2DTuple aScale, aTranslation;
+ double fRotate, fShearX;
+
+ rGC.Transformation.decompose( aScale, aTranslation, fRotate, fShearX );
+
+ OUStringBuffer aBuf( 256 );
+
+ // TODO(F2): general transformation case missing; if implemented, note
+ // that ODF rotation is oriented the other way
+
+ // build transformation string
+ if (rElem.MirrorVertical)
+ {
+ // At some point, rElem.h may start arriving positive,
+ // so use robust adjusting math
+ rel_y -= std::abs(rElem.h);
+ if (!aBuf.isEmpty())
+ aBuf.append(' ');
+ aBuf.append("scale( 1.0 -1.0 )");
+ }
+ if( fShearX != 0.0 )
+ {
+ aBuf.append( "skewX( " + OUString::number(fShearX) + " )" );
+ }
+ if( fRotate != 0.0 )
+ {
+ if( !aBuf.isEmpty() )
+ aBuf.append( ' ' );
+ aBuf.append( "rotate( " + OUString::number(-fRotate) + " )" );
+
+ }
+ if( ! rElem.isCharacter )
+ {
+ if( !aBuf.isEmpty() )
+ aBuf.append( ' ' );
+ aBuf.append( "translate( "
+ + convertPixelToUnitString( rel_x )
+ + " "
+ + convertPixelToUnitString( rel_y )
+ + " )" );
+ }
+
+ rProps[ "draw:transform" ] = aBuf.makeStringAndClear();
+ }
+}
+
+void WriterXmlEmitter::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( elem.Children.empty() )
+ return;
+
+ bool bTextBox = (dynamic_cast<ParagraphElement*>(elem.Children.front().get()) != nullptr);
+ PropertyMap aFrameProps;
+ fillFrameProps( elem, aFrameProps, m_rEmitContext );
+ m_rEmitContext.rEmitter.beginTag( "draw:frame", aFrameProps );
+ if( bTextBox )
+ m_rEmitContext.rEmitter.beginTag( "draw:text-box", PropertyMap() );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+
+ if( bTextBox )
+ m_rEmitContext.rEmitter.endTag( "draw:text-box" );
+ m_rEmitContext.rEmitter.endTag( "draw:frame" );
+}
+
+void WriterXmlEmitter::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ elem.updateGeometry();
+ /* note:
+ * aw recommends using 100dth of mm in all respects since the xml import
+ * (a) is buggy (see issue 37213)
+ * (b) is optimized for 100dth of mm and does not scale itself then,
+ * this does not gain us speed but makes for smaller rounding errors since
+ * the xml importer coordinates are integer based
+ */
+ for (sal_uInt32 i = 0; i< elem.PolyPoly.count(); i++)
+ {
+ basegfx::B2DPolygon b2dPolygon = elem.PolyPoly.getB2DPolygon( i );
+
+ for ( sal_uInt32 j = 0; j< b2dPolygon.count(); j++ )
+ {
+ basegfx::B2DPoint point;
+ basegfx::B2DPoint nextPoint;
+ point = b2dPolygon.getB2DPoint( j );
+
+ basegfx::B2DPoint prevPoint = b2dPolygon.getPrevControlPoint( j ) ;
+
+ point.setX( convPx2mmPrec2( point.getX() )*100.0 );
+ point.setY( convPx2mmPrec2( point.getY() )*100.0 );
+
+ if ( b2dPolygon.isPrevControlPointUsed( j ) )
+ {
+ prevPoint.setX( convPx2mmPrec2( prevPoint.getX() )*100.0 );
+ prevPoint.setY( convPx2mmPrec2( prevPoint.getY() )*100.0 );
+ }
+
+ if ( b2dPolygon.isNextControlPointUsed( j ) )
+ {
+ nextPoint = b2dPolygon.getNextControlPoint( j ) ;
+ nextPoint.setX( convPx2mmPrec2( nextPoint.getX() )*100.0 );
+ nextPoint.setY( convPx2mmPrec2( nextPoint.getY() )*100.0 );
+ }
+
+ b2dPolygon.setB2DPoint( j, point );
+
+ if ( b2dPolygon.isPrevControlPointUsed( j ) )
+ b2dPolygon.setPrevControlPoint( j , prevPoint ) ;
+
+ if ( b2dPolygon.isNextControlPointUsed( j ) )
+ b2dPolygon.setNextControlPoint( j , nextPoint ) ;
+ }
+
+ elem.PolyPoly.setB2DPolygon( i, b2dPolygon );
+ }
+
+ PropertyMap aProps;
+ fillFrameProps( elem, aProps, m_rEmitContext );
+ aProps[ "svg:viewBox" ] =
+ "0 0 "
+ + OUString::number(convPx2mmPrec2(elem.w)*100.0)
+ + " "
+ + OUString::number( convPx2mmPrec2(elem.h)*100.0 );
+ aProps[ "svg:d" ] = basegfx::utils::exportToSvgD( elem.PolyPoly, true, true, false );
+
+ m_rEmitContext.rEmitter.beginTag( "draw:path", aProps );
+ m_rEmitContext.rEmitter.endTag( "draw:path" );
+}
+
+void WriterXmlEmitter::visit( ImageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ PropertyMap aImageProps;
+ m_rEmitContext.rEmitter.beginTag( "draw:image", aImageProps );
+ m_rEmitContext.rEmitter.beginTag( "office:binary-data", PropertyMap() );
+ m_rEmitContext.rImages.writeBase64EncodedStream( elem.Image, m_rEmitContext);
+ m_rEmitContext.rEmitter.endTag( "office:binary-data" );
+ m_rEmitContext.rEmitter.endTag( "draw:image" );
+}
+
+void WriterXmlEmitter::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( m_rEmitContext.xStatusIndicator.is() )
+ m_rEmitContext.xStatusIndicator->setValue( elem.PageNumber );
+
+ auto this_it = elem.Children.begin();
+ while( this_it != elem.Children.end() && this_it->get() != &elem )
+ {
+ (*this_it)->visitedBy( *this, this_it );
+ ++this_it;
+ }
+}
+
+void WriterXmlEmitter::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+ m_rEmitContext.rEmitter.beginTag( "office:body", PropertyMap() );
+ m_rEmitContext.rEmitter.beginTag( "office:text", PropertyMap() );
+
+ for( const auto& rxChild : elem.Children )
+ {
+ PageElement* pPage = dynamic_cast<PageElement*>(rxChild.get());
+ if( pPage )
+ {
+ // emit only page anchored objects
+ // currently these are only DrawElement types
+ for( auto child_it = pPage->Children.begin(); child_it != pPage->Children.end(); ++child_it )
+ {
+ if( dynamic_cast<DrawElement*>(child_it->get()) != nullptr )
+ (*child_it)->visitedBy( *this, child_it );
+ }
+ }
+ }
+
+ // do not emit page anchored objects, they are emitted before
+ // (must precede all pages in writer document) currently these are
+ // only DrawElement types
+ for( auto it = elem.Children.begin(); it != elem.Children.end(); ++it )
+ {
+ if( dynamic_cast<DrawElement*>(it->get()) == nullptr )
+ (*it)->visitedBy( *this, it );
+ }
+
+ m_rEmitContext.rEmitter.endTag( "office:text" );
+ m_rEmitContext.rEmitter.endTag( "office:body" );
+}
+
+
+void WriterXmlOptimizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void WriterXmlOptimizer::visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+}
+
+void WriterXmlOptimizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ elem.applyToChildren(*this);
+}
+
+void WriterXmlOptimizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void WriterXmlOptimizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& elemIt )
+{
+ /* note: optimize two consecutive PolyPolyElements that
+ * have the same path but one of which is a stroke while
+ * the other is a fill
+ */
+ if( !elem.Parent )
+ return;
+ // find following PolyPolyElement in parent's children list
+ if( elemIt == elem.Parent->Children.end() )
+ return;
+ auto next_it = elemIt;
+ ++next_it;
+ if( next_it == elem.Parent->Children.end() )
+ return;
+
+ PolyPolyElement* pNext = dynamic_cast<PolyPolyElement*>(next_it->get());
+ if( !pNext || pNext->PolyPoly != elem.PolyPoly )
+ return;
+
+ const GraphicsContext& rNextGC =
+ m_rProcessor.getGraphicsContext( pNext->GCId );
+ const GraphicsContext& rThisGC =
+ m_rProcessor.getGraphicsContext( elem.GCId );
+
+ if( !(rThisGC.BlendMode == rNextGC.BlendMode &&
+ rThisGC.Flatness == rNextGC.Flatness &&
+ rThisGC.Transformation == rNextGC.Transformation &&
+ rThisGC.Clip == rNextGC.Clip &&
+ pNext->Action == PATH_STROKE &&
+ (elem.Action == PATH_FILL || elem.Action == PATH_EOFILL)) )
+ return;
+
+ GraphicsContext aGC = rThisGC;
+ aGC.LineJoin = rNextGC.LineJoin;
+ aGC.LineCap = rNextGC.LineCap;
+ aGC.LineWidth = rNextGC.LineWidth;
+ aGC.MiterLimit= rNextGC.MiterLimit;
+ aGC.DashArray = rNextGC.DashArray;
+ aGC.LineColor = rNextGC.LineColor;
+ elem.GCId = m_rProcessor.getGCId( aGC );
+
+ elem.Action |= pNext->Action;
+
+ elem.Children.splice( elem.Children.end(), pNext->Children );
+ elem.Parent->Children.erase(next_it);
+}
+
+void WriterXmlOptimizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt)
+{
+ optimizeTextElements( elem );
+
+ elem.applyToChildren(*this);
+
+ if( !(elem.Parent && rParentIt != elem.Parent->Children.end()) )
+ return;
+
+ // find if there is a previous paragraph that might be a heading for this one
+ auto prev = rParentIt;
+ ParagraphElement* pPrevPara = nullptr;
+ while( prev != elem.Parent->Children.begin() )
+ {
+ --prev;
+ pPrevPara = dynamic_cast< ParagraphElement* >(prev->get());
+ if( pPrevPara )
+ {
+ /* What constitutes a heading ? current hints are:
+ * - one line only
+ * - not too far away from this paragraph (two heading height max ?)
+ * - font larger or bold
+ * this is of course incomplete
+ * FIXME: improve hints for heading
+ */
+ // check for single line
+ if( pPrevPara->isSingleLined( m_rProcessor ) )
+ {
+ double head_line_height = pPrevPara->getLineHeight( m_rProcessor );
+ if( pPrevPara->y + pPrevPara->h + 2*head_line_height > elem.y )
+ {
+ // check for larger font
+ if( head_line_height > elem.getLineHeight( m_rProcessor ) )
+ {
+ pPrevPara->Type = ParagraphElement::Headline;
+ }
+ else
+ {
+ // check whether text of pPrevPara is bold (at least first text element)
+ // and this para is not bold (ditto)
+ TextElement* pPrevText = pPrevPara->getFirstTextChild();
+ TextElement* pThisText = elem.getFirstTextChild();
+ if( pPrevText && pThisText )
+ {
+ const FontAttributes& rPrevFont = m_rProcessor.getFont( pPrevText->FontId );
+ const FontAttributes& rThisFont = m_rProcessor.getFont( pThisText->FontId );
+ if ( (rPrevFont.fontWeight == u"600" ||
+ rPrevFont.fontWeight == u"bold" ||
+ rPrevFont.fontWeight == u"800" ||
+ rPrevFont.fontWeight == u"900" ) &&
+ (rThisFont.fontWeight == u"600" ||
+ rThisFont.fontWeight == u"bold" ||
+ rThisFont.fontWeight == u"800" ||
+ rThisFont.fontWeight == u"900" ) )
+ {
+ pPrevPara->Type = ParagraphElement::Headline;
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+}
+
+void WriterXmlOptimizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( m_rProcessor.getStatusIndicator().is() )
+ m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber );
+
+ // resolve hyperlinks
+ elem.resolveHyperlinks();
+
+ elem.resolveFontStyles( m_rProcessor ); // underlines and such
+
+ // FIXME: until hyperlinks and font effects are adjusted for
+ // geometrical search handle them before sorting
+ PDFIProcessor::sortElements( &elem );
+
+ // find paragraphs in text
+ ParagraphElement* pCurPara = nullptr;
+ std::list< std::unique_ptr<Element> >::iterator page_element, next_page_element;
+ next_page_element = elem.Children.begin();
+ double fCurLineHeight = 0.0; // average height of text items in current para
+ int nCurLineElements = 0; // number of line contributing elements in current para
+ double line_left = elem.w, line_right = 0.0;
+ double column_width = elem.w*0.75; // estimate text width
+ // TODO: guess columns
+ while( next_page_element != elem.Children.end() )
+ {
+ page_element = next_page_element++;
+ ParagraphElement* pPagePara = dynamic_cast<ParagraphElement*>(page_element->get());
+ if( pPagePara )
+ {
+ pCurPara = pPagePara;
+ // adjust line height and text items
+ fCurLineHeight = 0.0;
+ nCurLineElements = 0;
+ for( const auto& rxChild : pCurPara->Children )
+ {
+ TextElement* pTestText = rxChild->dynCastAsTextElement();
+ if( pTestText )
+ {
+ fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pTestText->h)/double(nCurLineElements+1);
+ nCurLineElements++;
+ }
+ }
+ continue;
+ }
+
+ HyperlinkElement* pLink = dynamic_cast<HyperlinkElement*>(page_element->get());
+ DrawElement* pDraw = dynamic_cast<DrawElement*>(page_element->get());
+ if( ! pDraw && pLink && ! pLink->Children.empty() )
+ pDraw = dynamic_cast<DrawElement*>(pLink->Children.front().get() );
+ if( pDraw )
+ {
+ // insert small drawing objects as character, else leave them page bound
+
+ bool bInsertToParagraph = false;
+ // first check if this is either inside the paragraph
+ if( pCurPara && pDraw->y < pCurPara->y + pCurPara->h )
+ {
+ if( pDraw->h < fCurLineHeight * 1.5 )
+ {
+ bInsertToParagraph = true;
+ fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pDraw->h)/double(nCurLineElements+1);
+ nCurLineElements++;
+ // mark draw element as character
+ pDraw->isCharacter = true;
+ }
+ }
+ // or perhaps the draw element begins a new paragraph
+ else if( next_page_element != elem.Children.end() )
+ {
+ TextElement* pText = (*next_page_element)->dynCastAsTextElement();
+ if( ! pText )
+ {
+ ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(next_page_element->get());
+ if( pPara && ! pPara->Children.empty() )
+ pText = pPara->Children.front()->dynCastAsTextElement();
+ }
+ if( pText && // check there is a text
+ pDraw->h < pText->h*1.5 && // and it is approx the same height
+ // and either upper or lower edge of pDraw is inside text's vertical range
+ ( ( pDraw->y >= pText->y && pDraw->y <= pText->y+pText->h ) ||
+ ( pDraw->y+pDraw->h >= pText->y && pDraw->y+pDraw->h <= pText->y+pText->h )
+ )
+ )
+ {
+ bInsertToParagraph = true;
+ fCurLineHeight = pDraw->h;
+ nCurLineElements = 1;
+ line_left = pDraw->x;
+ line_right = pDraw->x + pDraw->w;
+ // begin a new paragraph
+ pCurPara = nullptr;
+ // mark draw element as character
+ pDraw->isCharacter = true;
+ }
+ }
+
+ if( ! bInsertToParagraph )
+ {
+ pCurPara = nullptr;
+ continue;
+ }
+ }
+
+ TextElement* pText = (*page_element)->dynCastAsTextElement();
+ if( ! pText && pLink && ! pLink->Children.empty() )
+ pText = pLink->Children.front()->dynCastAsTextElement();
+ if( pText )
+ {
+ Element* pGeo = pLink ? static_cast<Element*>(pLink) :
+ static_cast<Element*>(pText);
+ if( pCurPara )
+ {
+ // there was already a text element, check for a new paragraph
+ if( nCurLineElements > 0 )
+ {
+ // if the new text is significantly distant from the paragraph
+ // begin a new paragraph
+ if( pGeo->y > pCurPara->y+pCurPara->h + fCurLineHeight*0.5 )
+ pCurPara = nullptr; // insert new paragraph
+ else if( pGeo->y > (pCurPara->y+pCurPara->h - fCurLineHeight*0.05) )
+ {
+ // new paragraph if either the last line of the paragraph
+ // was significantly shorter than the paragraph as a whole
+ if( (line_right - line_left) < pCurPara->w*0.75 )
+ pCurPara = nullptr;
+ // or the last line was significantly smaller than the column width
+ else if( (line_right - line_left) < column_width*0.75 )
+ pCurPara = nullptr;
+ }
+ }
+ }
+ // update line height/width
+ if( pCurPara )
+ {
+ fCurLineHeight = (fCurLineHeight*double(nCurLineElements) + pGeo->h)/double(nCurLineElements+1);
+ nCurLineElements++;
+ if( pGeo->x < line_left )
+ line_left = pGeo->x;
+ if( pGeo->x+pGeo->w > line_right )
+ line_right = pGeo->x+pGeo->w;
+ }
+ else
+ {
+ fCurLineHeight = pGeo->h;
+ nCurLineElements = 1;
+ line_left = pGeo->x;
+ line_right = pGeo->x + pGeo->w;
+ }
+ }
+
+ // move element to current paragraph
+ if( ! pCurPara ) // new paragraph, insert one
+ {
+ pCurPara = ElementFactory::createParagraphElement( nullptr );
+ // set parent
+ pCurPara->Parent = &elem;
+ //insert new paragraph before current element
+ page_element = elem.Children.insert( page_element, std::unique_ptr<Element>(pCurPara) );
+ // forward iterator to current element again
+ ++ page_element;
+ // update next_element which is now invalid
+ next_page_element = page_element;
+ ++ next_page_element;
+ }
+ Element* pCurEle = page_element->get();
+ Element::setParent( page_element, pCurPara );
+ OSL_ENSURE( !pText || pCurEle == pText || pCurEle == pLink, "paragraph child list in disorder" );
+ if( pText || pDraw )
+ pCurPara->updateGeometryWith( pCurEle );
+ }
+
+ // process children
+ elem.applyToChildren(*this);
+
+ // find possible header and footer
+ checkHeaderAndFooter( elem );
+}
+
+void WriterXmlOptimizer::checkHeaderAndFooter( PageElement& rElem )
+{
+ /* indicators for a header:
+ * - single line paragraph at top of page (inside 15% page height)
+ * - at least lineheight above the next paragraph
+ *
+ * indicators for a footer likewise:
+ * - single line paragraph at bottom of page (inside 15% page height)
+ * - at least lineheight below the previous paragraph
+ */
+
+ auto isParagraphElement = [](std::unique_ptr<Element>& rxChild) -> bool {
+ return dynamic_cast<ParagraphElement*>(rxChild.get()) != nullptr;
+ };
+
+ // detect header
+ // Note: the following assumes that the pages' children have been
+ // sorted geometrically
+ auto it = std::find_if(rElem.Children.begin(), rElem.Children.end(), isParagraphElement);
+ if (it != rElem.Children.end())
+ {
+ ParagraphElement& rPara = dynamic_cast<ParagraphElement&>(**it);
+ if( rPara.y+rPara.h < rElem.h*0.15 && rPara.isSingleLined( m_rProcessor ) )
+ {
+ auto next_it = it;
+ ParagraphElement* pNextPara = nullptr;
+ while( ++next_it != rElem.Children.end() && pNextPara == nullptr )
+ {
+ pNextPara = dynamic_cast<ParagraphElement*>(next_it->get());
+ }
+ if( pNextPara && pNextPara->y > rPara.y+rPara.h*2 )
+ {
+ rElem.HeaderElement = std::move(*it);
+ rPara.Parent = nullptr;
+ rElem.Children.erase( it );
+ }
+ }
+ }
+
+ // detect footer
+ auto rit = std::find_if(rElem.Children.rbegin(), rElem.Children.rend(), isParagraphElement);
+ if (rit == rElem.Children.rend())
+ return;
+
+ ParagraphElement& rPara = dynamic_cast<ParagraphElement&>(**rit);
+ if( !(rPara.y > rElem.h*0.85 && rPara.isSingleLined( m_rProcessor )) )
+ return;
+
+ std::list< std::unique_ptr<Element> >::reverse_iterator next_it = rit;
+ ParagraphElement* pNextPara = nullptr;
+ while( ++next_it != rElem.Children.rend() && pNextPara == nullptr )
+ {
+ pNextPara = dynamic_cast<ParagraphElement*>(next_it->get());
+ }
+ if( pNextPara && pNextPara->y < rPara.y-rPara.h*2 )
+ {
+ rElem.FooterElement = std::move(*rit);
+ rPara.Parent = nullptr;
+ rElem.Children.erase( std::next(rit).base() );
+ }
+}
+
+void WriterXmlOptimizer::optimizeTextElements(Element& rParent)
+{
+ if( rParent.Children.empty() ) // this should not happen
+ {
+ OSL_FAIL( "empty paragraph optimized" );
+ return;
+ }
+
+ // concatenate child elements with same font id
+ auto next = rParent.Children.begin();
+ auto it = next++;
+ FrameElement* pFrame = dynamic_cast<FrameElement*>(rParent.Parent);
+ bool bRotatedFrame = false;
+ if( pFrame )
+ {
+ const GraphicsContext& rFrameGC = m_rProcessor.getGraphicsContext( pFrame->GCId );
+ if( rFrameGC.isRotatedOrSkewed() )
+ bRotatedFrame = true;
+ }
+ while( next != rParent.Children.end() )
+ {
+ bool bConcat = false;
+ TextElement* pCur = (*it)->dynCastAsTextElement();
+ if( pCur )
+ {
+ TextElement* pNext = dynamic_cast<TextElement*>(next->get());
+ OUString str;
+ bool bPara = strspn("ParagraphElement", typeid(rParent).name());
+ ParagraphElement* pPara = dynamic_cast<ParagraphElement*>(&rParent);
+ if (bPara && pPara && isComplex(GetBreakIterator(), pCur))
+ pPara->bRtl = true;
+ if( pNext )
+ {
+ const GraphicsContext& rCurGC = m_rProcessor.getGraphicsContext( pCur->GCId );
+ const GraphicsContext& rNextGC = m_rProcessor.getGraphicsContext( pNext->GCId );
+
+ // line and space optimization; works only in strictly horizontal mode
+
+ if( !bRotatedFrame
+ && ! rCurGC.isRotatedOrSkewed()
+ && ! rNextGC.isRotatedOrSkewed()
+ && ! pNext->Text.isEmpty()
+ && pNext->Text[0] != ' '
+ && ! pCur->Text.isEmpty()
+ && pCur->Text[pCur->Text.getLength() - 1] != ' '
+ )
+ {
+ // check for new line in paragraph
+ if( pNext->y > pCur->y+pCur->h )
+ {
+ // new line begins
+ // check whether a space would should be inserted or a hyphen removed
+ sal_Unicode aLastCode = pCur->Text[pCur->Text.getLength() - 1];
+ if( aLastCode == '-'
+ || aLastCode == 0x2010
+ || (aLastCode >= 0x2012 && aLastCode <= 0x2015)
+ || aLastCode == 0xff0d
+ )
+ {
+ // cut a hyphen
+ pCur->Text.setLength( pCur->Text.getLength()-1 );
+ }
+ // append a space unless there is a non breaking hyphen
+ else if( aLastCode != 0x2011 )
+ {
+ pCur->Text.append( ' ' );
+ }
+ }
+ else // we're continuing the same line
+ {
+ // check whether a space would should be inserted
+ // check for a small horizontal offset
+ if( pCur->x + pCur->w + pNext->h*0.15 < pNext->x )
+ {
+ pCur->Text.append( ' ' );
+ }
+ }
+ }
+ // concatenate consecutive text elements unless there is a
+ // font or text color change, leave a new span in that case
+ if( pCur->FontId == pNext->FontId &&
+ rCurGC.FillColor.Red == rNextGC.FillColor.Red &&
+ rCurGC.FillColor.Green == rNextGC.FillColor.Green &&
+ rCurGC.FillColor.Blue == rNextGC.FillColor.Blue &&
+ rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha
+ )
+ {
+ pCur->updateGeometryWith( pNext );
+ if (pPara && pPara->bRtl)
+ {
+ // Tdf#152083: If RTL, reverse the text in pNext so that its correct order is
+ // restored when the combined text is reversed in WriterXmlEmitter::visit.
+ OUString tempStr;
+ bool bNeedReverse=false;
+ str = pNext->Text.toString();
+ for (sal_Int32 i=0; i < str.getLength(); i++)
+ {
+ if (str[i] == u' ')
+ { // Space char (e.g. the space as in " م") needs special treatment.
+ // First, append the space char to pCur.
+ pCur->Text.append(OUStringChar(str[i]));
+ // Then, check whether the tmpStr needs reverse, if so then reverse and append.
+ if (bNeedReverse)
+ {
+ tempStr = ::comphelper::string::reverseCodePoints(tempStr);
+ pCur->Text.append(tempStr);
+ tempStr = u""_ustr;
+ }
+ bNeedReverse = false;
+ }
+ else
+ {
+ tempStr += OUStringChar(str[i]);
+ bNeedReverse = true;
+ }
+ }
+ // Do the last append
+ if (bNeedReverse)
+ {
+ tempStr = ::comphelper::string::reverseCodePoints(tempStr);
+ pCur->Text.append(tempStr);
+ }
+ else
+ {
+ pCur->Text.append(tempStr);
+ }
+ }
+ else
+ {
+ // append text to current element directly without reverse
+ pCur->Text.append(pNext->Text);
+ }
+ if (bPara && pPara && isComplex(GetBreakIterator(), pCur))
+ pPara->bRtl = true;
+ // append eventual children to current element
+ // and clear children (else the children just
+ // appended to pCur would be destroyed)
+ pCur->Children.splice( pCur->Children.end(), pNext->Children );
+ // get rid of the now useless element
+ rParent.Children.erase( next );
+ bConcat = true;
+ }
+ }
+ }
+ else if( dynamic_cast<HyperlinkElement*>(it->get()) )
+ optimizeTextElements( **it );
+ if( bConcat )
+ {
+ next = it;
+ ++next;
+ }
+ else
+ {
+ ++it;
+ ++next;
+ }
+ }
+}
+
+void WriterXmlOptimizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+ elem.applyToChildren(*this);
+}
+
+
+void WriterXmlFinalizer::visit( PolyPolyElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ // xxx TODO copied from DrawElement
+ const GraphicsContext& rGC = m_rProcessor.getGraphicsContext(elem.GCId );
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "graphic";
+
+ PropertyMap aGCProps;
+ if (elem.Action & PATH_STROKE)
+ {
+ double scale = GetAverageTransformationScale(rGC.Transformation);
+ if (rGC.DashArray.size() < 2)
+ {
+ aGCProps[ "draw:stroke" ] = "solid";
+ }
+ else
+ {
+ PropertyMap props;
+ FillDashStyleProps(props, rGC.DashArray, scale);
+ StyleContainer::Style style("draw:stroke-dash"_ostr, std::move(props));
+
+ aGCProps[ "draw:stroke" ] = "dash";
+ aGCProps[ "draw:stroke-dash" ] =
+ m_rStyleContainer.getStyleName(
+ m_rStyleContainer.getStyleId(style));
+ }
+
+ aGCProps[ "svg:stroke-color" ] = getColorString(rGC.LineColor);
+ aGCProps[ "svg:stroke-width" ] = convertPixelToUnitString(rGC.LineWidth * scale);
+ aGCProps[ "draw:stroke-linejoin" ] = rGC.GetLineJoinString();
+ aGCProps[ "svg:stroke-linecap" ] = rGC.GetLineCapString();
+ }
+ else
+ {
+ aGCProps[ "draw:stroke" ] = "none";
+ }
+
+ // TODO(F1): check whether stuff could be emulated by gradient/bitmap/hatch
+ if( elem.Action & (PATH_FILL | PATH_EOFILL) )
+ {
+ aGCProps[ "draw:fill" ] = "solid";
+ aGCProps[ "draw:fill-color" ] = getColorString( rGC.FillColor );
+ }
+ else
+ {
+ aGCProps[ "draw:fill" ] = "none";
+ }
+
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:graphic-properties"_ostr, std::move(aGCProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+}
+
+void WriterXmlFinalizer::visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void WriterXmlFinalizer::visit( TextElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ const FontAttributes& rFont = m_rProcessor.getFont( elem.FontId );
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "text";
+
+ PropertyMap aFontProps;
+
+ // family name
+ // TODO: tdf#143095: use system font name rather than PSName
+ SAL_INFO("sdext.pdfimport", "The font used in xml is: " << rFont.familyName);
+ aFontProps[ "fo:font-family" ] = rFont.familyName;
+ aFontProps[ "style:font-family-asian" ] = rFont.familyName;
+ aFontProps[ "style:font-family-complex" ] = rFont.familyName;
+
+ // bold
+ aFontProps[ "fo:font-weight" ] = rFont.fontWeight;
+ aFontProps[ "style:font-weight-asian" ] = rFont.fontWeight;
+ aFontProps[ "style:font-weight-complex" ] = rFont.fontWeight;
+
+ // italic
+ if( rFont.isItalic )
+ {
+ aFontProps[ "fo:font-style" ] = "italic";
+ aFontProps[ "style:font-style-asian" ] = "italic";
+ aFontProps[ "style:font-style-complex" ] = "italic";
+ }
+
+ // underline
+ if( rFont.isUnderline )
+ {
+ aFontProps[ "style:text-underline-style" ] = "solid";
+ aFontProps[ "style:text-underline-width" ] = "auto";
+ aFontProps[ "style:text-underline-color" ] = "font-color";
+ }
+
+ // outline
+ if( rFont.isOutline )
+ aFontProps[ "style:text-outline" ] = "true";
+
+ // size
+ OUString aFSize = OUString::number( rFont.size*72/PDFI_OUTDEV_RESOLUTION ) + "pt";
+ aFontProps[ "fo:font-size" ] = aFSize;
+ aFontProps[ "style:font-size-asian" ] = aFSize;
+ aFontProps[ "style:font-size-complex" ] = aFSize;
+
+ // color
+ const GraphicsContext& rGC = m_rProcessor.getGraphicsContext( elem.GCId );
+ aFontProps[ "fo:color" ] = getColorString( rFont.isOutline ? rGC.LineColor : rGC.FillColor );
+
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:text-properties"_ostr, std::move(aFontProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+}
+
+void WriterXmlFinalizer::visit( ParagraphElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& rParentIt )
+{
+ PropertyMap aParaProps;
+
+ if( elem.Parent )
+ {
+ // check for center alignment
+ // criterion: paragraph is small relative to parent and distributed around its center
+ double p_x = elem.Parent->x;
+ double p_w = elem.Parent->w;
+
+ PageElement* pPage = dynamic_cast<PageElement*>(elem.Parent);
+ if( pPage )
+ {
+ p_x += pPage->LeftMargin;
+ p_w -= pPage->LeftMargin+pPage->RightMargin;
+ }
+ bool bIsCenter = false;
+ if( elem.w < ( p_w/2) )
+ {
+ double delta = elem.w/4;
+ // allow very small paragraphs to deviate a little more
+ // relative to parent's center
+ if( elem.w < p_w/8 )
+ delta = elem.w;
+ if( fabs( elem.x+elem.w/2 - ( p_x+ p_w/2) ) < delta ||
+ (pPage && fabs( elem.x+elem.w/2 - (pPage->x + pPage->w/2) ) < delta) )
+ {
+ bIsCenter = true;
+ aParaProps[ "fo:text-align" ] = "center";
+ }
+ }
+ if( ! bIsCenter && elem.x > p_x + p_w/10 )
+ {
+ // indent
+ aParaProps[ "fo:margin-left" ] = OUString::number(convPx2mm( elem.x - p_x )) + "mm";
+ }
+
+ // check whether to leave some space to next paragraph
+ // find whether there is a next paragraph
+ auto it = rParentIt;
+ const ParagraphElement* pNextPara = nullptr;
+ while( ++it != elem.Parent->Children.end() && ! pNextPara )
+ pNextPara = dynamic_cast< const ParagraphElement* >(it->get());
+ if( pNextPara )
+ {
+ if( pNextPara->y - (elem.y+elem.h) > convmm2Px( 10 ) )
+ {
+ aParaProps[ "fo:margin-bottom" ] =
+ OUString::number( convPx2mm( pNextPara->y - (elem.y+elem.h) ) ) + "mm";
+ }
+ }
+ }
+
+ if( ! aParaProps.empty() )
+ {
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "paragraph";
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:paragraph-properties"_ostr, std::move(aParaProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+ }
+
+ elem.applyToChildren(*this);
+}
+
+void WriterXmlFinalizer::visit( FrameElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator&)
+{
+ PropertyMap aProps;
+ aProps[ "style:family" ] = "graphic";
+
+ PropertyMap aGCProps;
+
+ aGCProps[ "draw:stroke" ] = "none";
+ aGCProps[ "draw:fill" ] = "none";
+ aGCProps[ "draw:auto-grow-height" ] = "true";
+ aGCProps[ "draw:auto-grow-width" ] = "true";
+ aGCProps[ "draw:textarea-horizontal-align" ] = "left";
+ aGCProps[ "draw:textarea-vertical-align" ] = "top";
+ aGCProps[ "fo:min-height"] = "0cm";
+ aGCProps[ "fo:min-width"] = "0cm";
+ aGCProps[ "fo:padding-top" ] = "0cm";
+ aGCProps[ "fo:padding-left" ] = "0cm";
+ aGCProps[ "fo:padding-right" ] = "0cm";
+ aGCProps[ "fo:padding-bottom" ] = "0cm";
+
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ StyleContainer::Style aSubStyle( "style:graphic-properties"_ostr, std::move(aGCProps) );
+ aStyle.SubStyles.push_back( &aSubStyle );
+
+ elem.StyleId = m_rStyleContainer.getStyleId( aStyle );
+ elem.applyToChildren(*this);
+}
+
+void WriterXmlFinalizer::visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+}
+
+void WriterXmlFinalizer::setFirstOnPage( ParagraphElement& rElem,
+ StyleContainer& rStyles,
+ const OUString& rMasterPageName )
+{
+ PropertyMap aProps;
+ if( rElem.StyleId != -1 )
+ {
+ const PropertyMap* pProps = rStyles.getProperties( rElem.StyleId );
+ if( pProps )
+ aProps = *pProps;
+ }
+
+ aProps[ "style:family" ] = "paragraph";
+ aProps[ "style:master-page-name" ] = rMasterPageName;
+
+ if( rElem.StyleId != -1 )
+ rElem.StyleId = rStyles.setProperties( rElem.StyleId, std::move(aProps) );
+ else
+ {
+ StyleContainer::Style aStyle( "style:style"_ostr, std::move(aProps) );
+ rElem.StyleId = rStyles.getStyleId( aStyle );
+ }
+}
+
+void WriterXmlFinalizer::visit( PageElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ if( m_rProcessor.getStatusIndicator().is() )
+ m_rProcessor.getStatusIndicator()->setValue( elem.PageNumber );
+
+ // transform from pixel to mm
+ double page_width = convPx2mm( elem.w ), page_height = convPx2mm( elem.h );
+
+ // calculate page margins out of the relevant children (paragraphs)
+ elem.TopMargin = elem.h;
+ elem.BottomMargin = 0;
+ elem.LeftMargin = elem.w;
+ elem.RightMargin = 0;
+ // first element should be a paragraph
+ ParagraphElement* pFirstPara = nullptr;
+ for( const auto& rxChild : elem.Children )
+ {
+ if( dynamic_cast<ParagraphElement*>( rxChild.get() ) )
+ {
+ if( rxChild->x < elem.LeftMargin )
+ elem.LeftMargin = rxChild->x;
+ if( rxChild->y < elem.TopMargin )
+ elem.TopMargin = rxChild->y;
+ if( rxChild->x + rxChild->w > elem.w - elem.RightMargin )
+ elem.RightMargin = elem.w - (rxChild->x + rxChild->w);
+ if( rxChild->y + rxChild->h > elem.h - elem.BottomMargin )
+ elem.BottomMargin = elem.h - (rxChild->y + rxChild->h);
+ if( ! pFirstPara )
+ pFirstPara = dynamic_cast<ParagraphElement*>( rxChild.get() );
+ }
+ }
+ if( elem.HeaderElement && elem.HeaderElement->y < elem.TopMargin )
+ elem.TopMargin = elem.HeaderElement->y;
+ if( elem.FooterElement && elem.FooterElement->y+elem.FooterElement->h > elem.h - elem.BottomMargin )
+ elem.BottomMargin = elem.h - (elem.FooterElement->y + elem.FooterElement->h);
+
+ // transform margins to mm
+ double left_margin = convPx2mm( elem.LeftMargin );
+ double right_margin = convPx2mm( elem.RightMargin );
+ double top_margin = convPx2mm( elem.TopMargin );
+ double bottom_margin = convPx2mm( elem.BottomMargin );
+ if( ! pFirstPara )
+ {
+ // use default page margins
+ left_margin = 10;
+ right_margin = 10;
+ top_margin = 10;
+ bottom_margin = 10;
+ }
+
+ // round left/top margin to nearest mm
+ left_margin = rtl_math_round( left_margin, 0, rtl_math_RoundingMode_Floor );
+ top_margin = rtl_math_round( top_margin, 0, rtl_math_RoundingMode_Floor );
+ // round (fuzzy) right/bottom margin to nearest cm
+ right_margin = rtl_math_round( right_margin, right_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor );
+ bottom_margin = rtl_math_round( bottom_margin, bottom_margin >= 10 ? -1 : 0, rtl_math_RoundingMode_Floor );
+
+ // set reasonable default in case of way too large margins
+ // e.g. no paragraph case
+ if( left_margin > page_width/2.0 - 10 )
+ left_margin = 10;
+ if( right_margin > page_width/2.0 - 10 )
+ right_margin = 10;
+ if( top_margin > page_height/2.0 - 10 )
+ top_margin = 10;
+ if( bottom_margin > page_height/2.0 - 10 )
+ bottom_margin = 10;
+
+ // catch the weird cases
+ if( left_margin < 0 )
+ left_margin = 0;
+ if( right_margin < 0 )
+ right_margin = 0;
+ if( top_margin < 0 )
+ top_margin = 0;
+ if( bottom_margin < 0 )
+ bottom_margin = 0;
+
+ // widely differing margins are unlikely to be correct
+ if( right_margin > left_margin*1.5 )
+ right_margin = left_margin;
+
+ elem.LeftMargin = convmm2Px( left_margin );
+ elem.RightMargin = convmm2Px( right_margin );
+ elem.TopMargin = convmm2Px( top_margin );
+ elem.BottomMargin = convmm2Px( bottom_margin );
+
+ // get styles for paragraphs
+ PropertyMap aPageProps;
+ PropertyMap aPageLayoutProps;
+ aPageLayoutProps[ "fo:page-width" ] = unitMMString( page_width );
+ aPageLayoutProps[ "fo:page-height" ] = unitMMString( page_height );
+ aPageLayoutProps[ "style:print-orientation" ]
+ = elem.w < elem.h ? std::u16string_view(u"portrait") : std::u16string_view(u"landscape");
+ aPageLayoutProps[ "fo:margin-top" ] = unitMMString( top_margin );
+ aPageLayoutProps[ "fo:margin-bottom" ] = unitMMString( bottom_margin );
+ aPageLayoutProps[ "fo:margin-left" ] = unitMMString( left_margin );
+ aPageLayoutProps[ "fo:margin-right" ] = unitMMString( right_margin );
+ aPageLayoutProps[ "style:writing-mode" ]= "lr-tb";
+
+ StyleContainer::Style aStyle( "style:page-layout"_ostr, std::move(aPageProps));
+ StyleContainer::Style aSubStyle( "style:page-layout-properties"_ostr, std::move(aPageLayoutProps));
+ aStyle.SubStyles.push_back(&aSubStyle);
+ sal_Int32 nPageStyle = m_rStyleContainer.impl_getStyleId( aStyle, false );
+
+ // create master page
+ OUString aMasterPageLayoutName = m_rStyleContainer.getStyleName( nPageStyle );
+ aPageProps[ "style:page-layout-name" ] = aMasterPageLayoutName;
+ StyleContainer::Style aMPStyle( "style:master-page"_ostr, std::move(aPageProps) );
+ StyleContainer::Style aHeaderStyle( "style:header"_ostr, PropertyMap() );
+ StyleContainer::Style aFooterStyle( "style:footer"_ostr, PropertyMap() );
+ if( elem.HeaderElement )
+ {
+ elem.HeaderElement->visitedBy( *this, std::list<std::unique_ptr<Element>>::iterator() );
+ aHeaderStyle.ContainedElement = elem.HeaderElement.get();
+ aMPStyle.SubStyles.push_back( &aHeaderStyle );
+ }
+ if( elem.FooterElement )
+ {
+ elem.FooterElement->visitedBy( *this, std::list<std::unique_ptr<Element>>::iterator() );
+ aFooterStyle.ContainedElement = elem.FooterElement.get();
+ aMPStyle.SubStyles.push_back( &aFooterStyle );
+ }
+ elem.StyleId = m_rStyleContainer.impl_getStyleId( aMPStyle,false );
+
+
+ OUString aMasterPageName = m_rStyleContainer.getStyleName( elem.StyleId );
+
+ // create styles for children
+ elem.applyToChildren(*this);
+
+ // no paragraph or other elements before the first paragraph
+ if( ! pFirstPara )
+ {
+ pFirstPara = ElementFactory::createParagraphElement( nullptr );
+ pFirstPara->Parent = &elem;
+ elem.Children.push_front( std::unique_ptr<Element>(pFirstPara) );
+ }
+ setFirstOnPage(*pFirstPara, m_rStyleContainer, aMasterPageName);
+}
+
+void WriterXmlFinalizer::visit( DocumentElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
+{
+ elem.applyToChildren(*this);
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx
new file mode 100644
index 0000000000..b0644dd481
--- /dev/null
+++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx
@@ -0,0 +1,113 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_WRITERTREEVISITING_HXX
+#define INCLUDED_SDEXT_SOURCE_PDFIMPORT_TREE_WRITERTREEVISITING_HXX
+
+#include <treevisiting.hxx>
+
+#include <pdfihelper.hxx>
+
+#include <com/sun/star/i18n/XBreakIterator.hpp>
+#include <com/sun/star/i18n/XCharacterClassification.hpp>
+
+namespace pdfi
+{
+ struct DrawElement;
+
+ class WriterXmlOptimizer : public ElementTreeVisitor
+ {
+ private:
+ PDFIProcessor& m_rProcessor;
+ css::uno::Reference<css::i18n::XBreakIterator> mxBreakIter;
+ void optimizeTextElements(Element& rParent);
+ void checkHeaderAndFooter( PageElement& rElem );
+
+ public:
+ const css::uno::Reference<css::i18n::XBreakIterator>& GetBreakIterator();
+ explicit WriterXmlOptimizer(PDFIProcessor& rProcessor) :
+ m_rProcessor(rProcessor)
+ {}
+
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ class WriterXmlFinalizer : public ElementTreeVisitor
+ {
+ private:
+ StyleContainer& m_rStyleContainer;
+ PDFIProcessor& m_rProcessor;
+
+ static void setFirstOnPage( ParagraphElement& rElem,
+ StyleContainer& rStyles,
+ const OUString& rMasterPageName );
+
+ public:
+ explicit WriterXmlFinalizer(StyleContainer& rStyleContainer,
+ PDFIProcessor& rProcessor) :
+ m_rStyleContainer(rStyleContainer),
+ m_rProcessor(rProcessor)
+ {}
+
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+
+ class WriterXmlEmitter : public ElementTreeVisitor
+ {
+ private:
+ css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass;
+ EmitContext& m_rEmitContext ;
+ static void fillFrameProps( DrawElement& rElem,
+ PropertyMap& rProps,
+ const EmitContext& rEmitContext );
+
+ public:
+ const css::uno::Reference<css::i18n::XCharacterClassification >& GetCharacterClassification();
+ explicit WriterXmlEmitter(EmitContext& rEmitContext) :
+ m_rEmitContext(rEmitContext)
+ {}
+
+ virtual void visit( HyperlinkElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( TextElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ParagraphElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( FrameElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PolyPolyElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( ImageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( PageElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ virtual void visit( DocumentElement&, const std::list< std::unique_ptr<Element> >::const_iterator& ) override;
+ };
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */