From 940b4d1848e8c70ab7642901a68594e8016caffc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 18:51:28 +0200 Subject: Adding upstream version 1:7.0.4. Signed-off-by: Daniel Baumann --- sw/qa/extras/htmlimport/htmlimport.cxx | 446 +++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 sw/qa/extras/htmlimport/htmlimport.cxx (limited to 'sw/qa/extras/htmlimport/htmlimport.cxx') diff --git a/sw/qa/extras/htmlimport/htmlimport.cxx b/sw/qa/extras/htmlimport/htmlimport.cxx new file mode 100644 index 000000000..fe323ca25 --- /dev/null +++ b/sw/qa/extras/htmlimport/htmlimport.cxx @@ -0,0 +1,446 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +class HtmlImportTest : public SwModelTestBase +{ + public: + HtmlImportTest() : SwModelTestBase("sw/qa/extras/htmlimport/data/", "HTML (StarWriter)") {} + private: + std::unique_ptr preTest(const char* /*filename*/) override + { + if (getTestName().indexOf("ReqIf") != -1) + { + setImportFilterOptions("xhtmlns=reqif-xhtml"); + // Bypass type detection, this is an XHTML fragment only. + setImportFilterName("HTML (StarWriter)"); + } + + return nullptr; + } +}; + +#define DECLARE_HTMLIMPORT_TEST(TestName, filename) DECLARE_SW_IMPORT_TEST(TestName, filename, nullptr, HtmlImportTest) + +DECLARE_HTMLIMPORT_TEST(testPictureImport, "picture.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + // The document contains two pictures stored as a link. + sfx2::LinkManager& rLinkManager = pTextDoc->GetDocShell()->GetDoc()->GetEditShell()->GetLinkManager(); + CPPUNIT_ASSERT_EQUAL(size_t(2), rLinkManager.GetLinks().size()); + rLinkManager.Remove(0,2); + CPPUNIT_ASSERT_EQUAL(size_t(0), rLinkManager.GetLinks().size()); + + // TODO: Get the data into clipboard in html format and paste + + // But when pasting we don't want images to be linked. + CPPUNIT_ASSERT_EQUAL(size_t(0), rLinkManager.GetLinks().size()); +} + +DECLARE_HTMLIMPORT_TEST(testInlinedImage, "inlined_image.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + // The document contains only one embedded picture inlined in img's src attribute. + + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + SwEditShell* pEditShell = pDoc->GetEditShell(); + CPPUNIT_ASSERT(pEditShell); + + // This was 1 before 3914a711060341345f15b83656457f90095f32d6 + const sfx2::LinkManager& rLinkManager = pEditShell->GetLinkManager(); + CPPUNIT_ASSERT_EQUAL(size_t(0), rLinkManager.GetLinks().size()); + + uno::Reference xShape = getShape(1); + uno::Reference const xNamed(xShape, uno::UNO_QUERY_THROW); + CPPUNIT_ASSERT_EQUAL(OUString("Image1"), xNamed->getName()); + + uno::Reference xGraphic = getProperty< uno::Reference >(xShape, "Graphic"); + CPPUNIT_ASSERT(xGraphic.is()); + CPPUNIT_ASSERT(xGraphic->getType() != graphic::GraphicType::EMPTY); + + for (int n = 0; ; n++) + { + SwNode* pNode = pDoc->GetNodes()[ n ]; + if (SwGrfNode *pGrfNode = pNode->GetGrfNode()) + { + // FIXME? For some reason without the fix in 72703173066a2db5c977d422ace + // I was getting GraphicType::NONE from SwEditShell::GetGraphicType() when + // running LibreOffice but cannot reproduce that in a unit test here. :-( + // So, this does not really test anything. + CPPUNIT_ASSERT(pGrfNode->GetGrfObj().GetType() != GraphicType::NONE); + break; + } + } +} + +DECLARE_HTMLIMPORT_TEST(testInlinedImagesPageAndParagraph, "PageAndParagraphFilled.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + + // The document contains embedded pictures inlined for PageBackground and + // ParagraphBackground, check for their existence after import + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + SwEditShell* pEditShell = pDoc->GetEditShell(); + CPPUNIT_ASSERT(pEditShell); + + // images are not linked, check for zero links + const sfx2::LinkManager& rLinkManager = pEditShell->GetLinkManager(); + CPPUNIT_ASSERT_EQUAL(size_t(0), rLinkManager.GetLinks().size()); + + // get the pageStyle where the PageBackgroundFill is defined. Caution: for + // HTML mode this is *not* called 'Default Style', but 'HTML'. Name is empty + // due to being loaded embedded. BitmapMode is repeat. + uno::Reference xPageProperties1(getStyles("PageStyles")->getByName("HTML"), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(drawing::FillStyle_BITMAP, getProperty(xPageProperties1, "FillStyle")); + CPPUNIT_ASSERT_EQUAL(OUString(), getProperty(xPageProperties1, "FillBitmapName")); + CPPUNIT_ASSERT_EQUAL(drawing::BitmapMode_REPEAT, getProperty(xPageProperties1, "FillBitmapMode")); + + // we should have one paragraph + const int nParagraphs = getParagraphs(); + CPPUNIT_ASSERT_EQUAL(1, nParagraphs); + + if(nParagraphs) + { + // get the paragraph + uno::Reference xPara = getParagraph(1); + uno::Reference< beans::XPropertySet > xParagraphProperties( xPara, uno::UNO_QUERY); + + // check for Bitmap FillStyle, name empty, repeat + CPPUNIT_ASSERT_EQUAL(drawing::FillStyle_BITMAP, getProperty(xParagraphProperties, "FillStyle")); + CPPUNIT_ASSERT_EQUAL(OUString(), getProperty(xParagraphProperties, "FillBitmapName")); + CPPUNIT_ASSERT_EQUAL(drawing::BitmapMode_REPEAT, getProperty(xParagraphProperties, "FillBitmapMode")); + } +} + +DECLARE_HTMLIMPORT_TEST(testListStyleType, "list-style.html") +{ + // check unnumbered list style - should be type circle here + uno::Reference< beans::XPropertySet > xParagraphProperties(getParagraph(4), + uno::UNO_QUERY); + uno::Reference xLevels( + xParagraphProperties->getPropertyValue("NumberingRules"), uno::UNO_QUERY); + uno::Sequence aProps; + xLevels->getByIndex(0) >>= aProps; // 1st level + + bool bBulletFound=false; + for (beans::PropertyValue const & rProp : std::as_const(aProps)) + { + if (rProp.Name == "BulletChar") + { + // should be 'o'. + CPPUNIT_ASSERT_EQUAL(OUString(u"\uE009"), rProp.Value.get()); + bBulletFound = true; + break; + } + } + CPPUNIT_ASSERT_MESSAGE("no BulletChar property found for para 4", bBulletFound); + + // check numbered list style - should be type lower-alpha here + xParagraphProperties.set(getParagraph(14), + uno::UNO_QUERY); + xLevels.set(xParagraphProperties->getPropertyValue("NumberingRules"), + uno::UNO_QUERY); + xLevels->getByIndex(0) >>= aProps; // 1st level + + for (beans::PropertyValue const & rProp : std::as_const(aProps)) + { + if (rProp.Name == "NumberingType") + { + printf("style is %d\n", rProp.Value.get()); + // is lower-alpha in input, translates into chars_lower_letter here + CPPUNIT_ASSERT_EQUAL(style::NumberingType::CHARS_LOWER_LETTER, + rProp.Value.get()); + return; + } + } + CPPUNIT_FAIL("no NumberingType property found for para 14"); +} + +DECLARE_HTMLIMPORT_TEST(testMetaIsoDates, "meta-ISO8601-dates.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDocShell* pDocShell(pTextDoc->GetDocShell()); + uno::Reference xDocProps; + + CPPUNIT_ASSERT(pDocShell); + uno::Reference xDPS(pDocShell->GetModel(), uno::UNO_QUERY); + xDocProps.set(xDPS->getDocumentProperties()); + + // get the document properties + CPPUNIT_ASSERT(xDocProps.is()); + DateTime aCreated(xDocProps->getCreationDate()); // in the new format + DateTime aModified(xDocProps->getModificationDate()); // in the legacy format (what LibreOffice used to write) + + CPPUNIT_ASSERT_EQUAL(DateTime(Date(7, 5, 2017), tools::Time(12, 34, 3, 921000000)), aCreated); + CPPUNIT_ASSERT_EQUAL(DateTime(Date(8, 5, 2017), tools::Time(12, 47, 0, 386000000)), aModified); +} + +DECLARE_HTMLIMPORT_TEST(testImageWidthAuto, "image-width-auto.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwTextAttr const*const pAttr(pTextDoc->GetDocShell()->GetDoc()->GetEditShell()-> + GetCursor()->GetNode().GetTextNode()->GetTextAttrForCharAt(0, RES_TXTATR_FLYCNT)); + CPPUNIT_ASSERT(pAttr); + SwFrameFormat const*const pFmt(pAttr->GetFlyCnt().GetFrameFormat()); + SwFormatFrameSize const& rSize(pFmt->GetFormatAttr(RES_FRM_SIZE)); + CPPUNIT_ASSERT_EQUAL(Size(1835, 560), rSize.GetSize()); +} + +DECLARE_HTMLIMPORT_TEST(testImageLazyRead, "image-lazy-read.html") +{ + auto xGraphic = getProperty>(getShape(1), "Graphic"); + Graphic aGraphic(xGraphic); + // This failed, import loaded the graphic, it wasn't lazy-read. + CPPUNIT_ASSERT(!aGraphic.isAvailable()); +} + +DECLARE_HTMLIMPORT_TEST(testChangedby, "meta-changedby.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDocShell* pDocShell(pTextDoc->GetDocShell()); + uno::Reference xDocProps; + + CPPUNIT_ASSERT(pDocShell); + uno::Reference xDPS(pDocShell->GetModel(), uno::UNO_QUERY); + xDocProps.set(xDPS->getDocumentProperties()); + + // get the document properties + CPPUNIT_ASSERT(xDocProps.is()); + + // the doc's property ModifiedBy is set correctly, ... + CPPUNIT_ASSERT_EQUAL(OUString("Blah"), xDocProps->getModifiedBy()); + + uno::Reference xTextFieldsSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xFieldsAccess(xTextFieldsSupplier->getTextFields()); + uno::Reference xFields(xFieldsAccess->createEnumeration()); + + // ...but there is no comment 'HTML: ' + CPPUNIT_ASSERT(!xFields->hasMoreElements()); +} + +DECLARE_HTMLIMPORT_TEST(testTableBorder1px, "table_border_1px.html") +{ + uno::Reference xTablesSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xTables(xTablesSupplier->getTextTables(), uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), xTables->getCount()); + uno::Reference xTable(xTables->getByIndex(0), uno::UNO_QUERY); + + table::BorderLine2 aBorder; + + uno::Reference xCellA1(xTable->getCellByName("A1"), uno::UNO_QUERY); + aBorder = getProperty(xCellA1, "TopBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell top border", aBorder.InnerLineWidth > 0); + aBorder = getProperty(xCellA1, "BottomBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell bottom border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellA1, "LeftBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell left border", aBorder.InnerLineWidth > 0); + aBorder = getProperty(xCellA1, "RightBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell right border", aBorder.InnerLineWidth > 0); + + uno::Reference xCellB1(xTable->getCellByName("B1"), uno::UNO_QUERY); + aBorder = getProperty(xCellB1, "TopBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell top border", aBorder.InnerLineWidth > 0); + aBorder = getProperty(xCellB1, "BottomBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell bottom border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellB1, "LeftBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell left border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellB1, "RightBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell right border", aBorder.InnerLineWidth > 0); + + uno::Reference xCellA2(xTable->getCellByName("A2"), uno::UNO_QUERY); + aBorder = getProperty(xCellA2, "TopBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell top border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellA2, "BottomBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell bottom border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellA2, "LeftBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell left border", aBorder.InnerLineWidth > 0); + aBorder = getProperty(xCellA2,"RightBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell right border", aBorder.InnerLineWidth > 0); + + uno::Reference xCellB2(xTable->getCellByName("B2"), uno::UNO_QUERY); + aBorder = getProperty(xCellB2, "TopBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell top border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellB2, "BottomBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell bottom border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellB2, "LeftBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected cell left border", sal_Int16(0), aBorder.InnerLineWidth); + aBorder = getProperty(xCellB2, "RightBorder"); + CPPUNIT_ASSERT_MESSAGE("Missing cell right border", aBorder.InnerLineWidth > 0); +} + +DECLARE_HTMLIMPORT_TEST(testOutlineLevel, "outline-level.html") +{ + // This was 0, HTML imported into Writer lost the outline numbering for + // Heading 1 styles. + CPPUNIT_ASSERT_EQUAL(static_cast(1), + getProperty(getParagraph(1), "OutlineLevel")); +} + +DECLARE_HTMLIMPORT_TEST(testReqIfBr, "reqif-br.xhtml") +{ + // was not recognized as a line break from a ReqIf file. + CPPUNIT_ASSERT(getParagraph(1)->getString().startsWith("aaa\nbbb")); +} + +DECLARE_HTMLIMPORT_TEST(testTdf80194_subscript, "tdf80194_subscript.html") +{ + uno::Reference xPara = getParagraph(1); + CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.f, getProperty(getRun(xPara, 1), "CharEscapement"), 0); + // Most recently, the default subscript was 33%, which is much too large for a subscript. + // The original 8% (derived from a mathematical calculation) is much better in general, + // and for HTML was a better match when testing with firefox. + // DFLT_ESC_AUTO_SUB was tested, but HTML specs are pretty loose, and generally + // it exceeds the font ascent - so the formula-based-escapement is not appropriate. + CPPUNIT_ASSERT_DOUBLES_EQUAL( -8.f, getProperty(getRun(xPara, 2, "p"), "CharEscapement"), 1); + + xPara.set(getParagraph(2)); + CPPUNIT_ASSERT_DOUBLES_EQUAL( 0.f, getProperty(getRun(xPara, 1), "CharEscapement"), 0); + uno::Reference xRun (getRun(xPara, 2, "L")); + CPPUNIT_ASSERT_DOUBLES_EQUAL( 33.f, getProperty(xRun, "CharEscapement"), 1); + // HTML (although unspecified) tends to use a fairly large font. Definitely more than DFLT_ESC_PROP. + CPPUNIT_ASSERT( 70 < getProperty(xRun, "CharEscapementHeight")); +} + +DECLARE_HTMLIMPORT_TEST(testReqIfTable, "reqif-table.xhtml") +{ + // to see this: soffice --infilter="HTML (StarWriter):xhtmlns=reqif-xhtml" sw/qa/extras/htmlimport/data/reqif-table.xhtml + // Load a table with xhtmlns=reqif-xhtml filter param. + uno::Reference xTablesSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xTables(xTablesSupplier->getTextTables(), + uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(static_cast(3), xTables->getCount()); + uno::Reference xTable(xTables->getByIndex(0), uno::UNO_QUERY); + uno::Reference xCell(xTable->getCellByName("A1"), uno::UNO_QUERY); + auto aBorder = getProperty(xCell, "TopBorder"); + // This was 0, tables had no borders, even if the default autoformat has + // borders and the markup allows no custom borders. + CPPUNIT_ASSERT_EQUAL_MESSAGE("Top Border", static_cast(18), aBorder.LineWidth); + aBorder = getProperty(xCell, "BottomBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Bottom Border", static_cast(18), aBorder.LineWidth); + aBorder = getProperty(xCell, "LeftBorder"); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Left Border", static_cast(18), aBorder.LineWidth); + aBorder = getProperty(xCell, "RightBorder"); + // This was 0. Single column tables had no right border. tdf#115576 + CPPUNIT_ASSERT_EQUAL_MESSAGE("Right Border", static_cast(18), aBorder.LineWidth); +} + +DECLARE_HTMLIMPORT_TEST(testImageSize, "image-size.html") +{ + awt::Size aSize = getShape(1)->getSize(); + OutputDevice* pDevice = Application::GetDefaultDevice(); + Size aPixelSize(200, 400); + Size aExpected = pDevice->PixelToLogic(aPixelSize, MapMode(MapUnit::Map100thMM)); + + // This was 1997, i.e. a hardcoded default, we did not look at the image + // header when the HTML markup declared no size. + CPPUNIT_ASSERT_EQUAL(static_cast(aExpected.getWidth()), aSize.Width); + CPPUNIT_ASSERT_EQUAL(static_cast(aExpected.getHeight()), aSize.Height); +} + +DECLARE_HTMLIMPORT_TEST(testTdf122789, "tdf122789.html") +{ + SwXTextDocument* pTextDoc = dynamic_cast(mxComponent.get()); + CPPUNIT_ASSERT(pTextDoc); + SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc(); + const SwFrameFormats& rFormats = *pDoc->GetSpzFrameFormats(); + CPPUNIT_ASSERT_EQUAL(static_cast(1), rFormats.size()); + // This failed, the image had an absolute size, not a relative one. + CPPUNIT_ASSERT_EQUAL(static_cast(70), rFormats[0]->GetAttrSet().GetFrameSize().GetWidthPercent()); +} + +DECLARE_HTMLIMPORT_TEST(testReqIfPageStyle, "reqif-page-style.xhtml") +{ + // Without the accompanying fix in place, this test would have failed with + // 'Expected: Standard, Actual : HTML'. + CPPUNIT_ASSERT_EQUAL(OUString("Standard"), + getProperty(getParagraph(1), "PageStyleName")); +} + +/// HTML import to the sw doc model tests. +class SwHtmlOptionsImportTest : public SwModelTestBase +{ +}; + +char const DATA_DIRECTORY[] = "/sw/qa/extras/htmlimport/data/"; + +CPPUNIT_TEST_FIXTURE(SwHtmlOptionsImportTest, testAllowedRTFOLEMimeTypes) +{ + uno::Sequence aTypes = { OUString("test/rtf") }; + uno::Sequence aLoadProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")), + comphelper::makePropertyValue("AllowedRTFOLEMimeTypes", aTypes), + }; + OUString aURL + = m_directories.getURLFromSrc(DATA_DIRECTORY) + "allowed-rtf-ole-mime-types.xhtml"; + mxComponent = loadFromDesktop(aURL, "com.sun.star.text.TextDocument", aLoadProperties); + uno::Reference xSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xObjects(xSupplier->getEmbeddedObjects(), + uno::UNO_QUERY); + CPPUNIT_ASSERT_EQUAL(static_cast(1), xObjects->getCount()); + uno::Reference xObject(xObjects->getByIndex(0), + uno::UNO_QUERY); + CPPUNIT_ASSERT(xObject.is()); + uno::Reference xEmbeddedObject( + xObject->getExtendedControlOverEmbeddedObject(), uno::UNO_QUERY); + // Without the accompanying fix in place, this test would have failed, because the returned + // embedded object was a dummy one, which does not support in-place editing. + CPPUNIT_ASSERT(xEmbeddedObject.is()); +} + +CPPUNIT_TEST_FIXTURE(SwHtmlOptionsImportTest, testHiddenTextframe) +{ + // Load HTML content into Writer, similar to HTML paste. + uno::Sequence aLoadProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + }; + OUString aURL + = m_directories.getURLFromSrc(DATA_DIRECTORY) + "hidden-textframe.html"; + mxComponent = loadFromDesktop(aURL, "com.sun.star.text.TextDocument", aLoadProperties); + + // Check the content of the draw page. + uno::Reference xSupplier(mxComponent, uno::UNO_QUERY); + uno::Reference xDrawPage = xSupplier->getDrawPage(); + + // Without the accompanying fix in place, this test would have failed with: + // - Expected: 0 + // - Actual : 1 + // i.e. an unexpected text frame was created, covering the actual content. + CPPUNIT_ASSERT_EQUAL(static_cast(0), xDrawPage->getCount()); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3