diff options
Diffstat (limited to 'svtools/qa/unit/testHtmlReader.cxx')
-rw-r--r-- | svtools/qa/unit/testHtmlReader.cxx | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/svtools/qa/unit/testHtmlReader.cxx b/svtools/qa/unit/testHtmlReader.cxx new file mode 100644 index 0000000000..5605060eab --- /dev/null +++ b/svtools/qa/unit/testHtmlReader.cxx @@ -0,0 +1,104 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include <cppunit/TestFixture.h> +#include <unotest/bootstrapfixturebase.hxx> +#include <svtools/parhtml.hxx> +#include <tools/ref.hxx> +#include <tools/stream.hxx> + +namespace +{ +/// Subclass of HTMLParser that can sense the import result. +class TestHTMLParser : public HTMLParser +{ +public: + TestHTMLParser(SvStream& rStream); + virtual void NextToken(HtmlTokenId nToken) override; + /// Make this public for test purposes. + using HTMLParser::SetNamespace; + + OUString m_aDocument; + int m_nLineBreakCount = 0; + OUString m_aCdata; +}; + +TestHTMLParser::TestHTMLParser(SvStream& rStream) + : HTMLParser(rStream) +{ +} + +void TestHTMLParser::NextToken(HtmlTokenId nToken) +{ + if (nToken == HtmlTokenId::TEXTTOKEN) + m_aDocument += aToken; + else if (nToken == HtmlTokenId::LINEBREAK) + ++m_nLineBreakCount; + else if (nToken == HtmlTokenId::CDATA) + m_aCdata = aToken; +} + +/// Tests HTMLParser. +class Test : public CppUnit::TestFixture +{ +}; + +CPPUNIT_TEST_FIXTURE(Test, testTdf114428) +{ + SvMemoryStream aStream; + OString aDocument("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<html>hello</html>"_ostr); + aStream.WriteBytes(aDocument.getStr(), aDocument.getLength()); + aStream.Seek(0); + + tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream); + xParser->CallParser(); + + // This was '<?xml version="1.0" encoding="utf-8"?> hello', XML declaration + // was not ignored. + CPPUNIT_ASSERT_EQUAL(OUString("hello"), xParser->m_aDocument.trim()); +} + +CPPUNIT_TEST_FIXTURE(Test, testLineBreak) +{ + SvMemoryStream aStream; + OString aDocument("aaa<br></br>bbb"_ostr); + aStream.WriteBytes(aDocument.getStr(), aDocument.getLength()); + aStream.Seek(0); + + tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream); + xParser->SetNamespace(u"reqif-xhtml"); + xParser->CallParser(); + + // This was 2, <br></br> was interpreted as 2 line breaks in XHTML mode. + CPPUNIT_ASSERT_EQUAL(1, xParser->m_nLineBreakCount); +} + +CPPUNIT_TEST_FIXTURE(Test, testCdata) +{ + // Given a document with CDATA: + SvMemoryStream aStream; + OString aDocument("A<![CDATA[B ü <]]>C"_ostr); + aStream.WriteBytes(aDocument.getStr(), aDocument.getLength()); + aStream.Seek(0); + + // When parsing that HTML: + tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream); + xParser->CallParser(); + + // Then make sure that we get a cdata token with the correct content: + // Without the accompanying fix in place, this test would have failed with: + // - Expected: B ü < + // - Actual : + // i.e. the content inside CDATA was lost. + CPPUNIT_ASSERT_EQUAL(OUString("B ü <"), xParser->m_aCdata); +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |