summaryrefslogtreecommitdiffstats
path: root/svtools/qa/unit/testHtmlReader.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /svtools/qa/unit/testHtmlReader.cxx
parentInitial commit. (diff)
downloadlibreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz
libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'svtools/qa/unit/testHtmlReader.cxx')
-rw-r--r--svtools/qa/unit/testHtmlReader.cxx104
1 files changed, 104 insertions, 0 deletions
diff --git a/svtools/qa/unit/testHtmlReader.cxx b/svtools/qa/unit/testHtmlReader.cxx
new file mode 100644
index 000000000..37f74e903
--- /dev/null
+++ b/svtools/qa/unit/testHtmlReader.cxx
@@ -0,0 +1,104 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <cppunit/TestFixture.h>
+#include <unotest/bootstrapfixturebase.hxx>
+#include <svtools/parhtml.hxx>
+#include <tools/ref.hxx>
+#include <tools/stream.hxx>
+
+namespace
+{
+/// Subclass of HTMLParser that can sense the import result.
+class TestHTMLParser : public HTMLParser
+{
+public:
+ TestHTMLParser(SvStream& rStream);
+ virtual void NextToken(HtmlTokenId nToken) override;
+ /// Make this public for test purposes.
+ using HTMLParser::SetNamespace;
+
+ OUString m_aDocument;
+ int m_nLineBreakCount = 0;
+ OUString m_aCdata;
+};
+
+TestHTMLParser::TestHTMLParser(SvStream& rStream)
+ : HTMLParser(rStream)
+{
+}
+
+void TestHTMLParser::NextToken(HtmlTokenId nToken)
+{
+ if (nToken == HtmlTokenId::TEXTTOKEN)
+ m_aDocument += aToken;
+ else if (nToken == HtmlTokenId::LINEBREAK)
+ ++m_nLineBreakCount;
+ else if (nToken == HtmlTokenId::CDATA)
+ m_aCdata = aToken;
+}
+
+/// Tests HTMLParser.
+class Test : public CppUnit::TestFixture
+{
+};
+
+CPPUNIT_TEST_FIXTURE(Test, testTdf114428)
+{
+ SvMemoryStream aStream;
+ OString aDocument("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<html>hello</html>");
+ aStream.WriteBytes(aDocument.getStr(), aDocument.getLength());
+ aStream.Seek(0);
+
+ tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream);
+ xParser->CallParser();
+
+ // This was '<?xml version="1.0" encoding="utf-8"?> hello', XML declaration
+ // was not ignored.
+ CPPUNIT_ASSERT_EQUAL(OUString("hello"), xParser->m_aDocument.trim());
+}
+
+CPPUNIT_TEST_FIXTURE(Test, testLineBreak)
+{
+ SvMemoryStream aStream;
+ OString aDocument("aaa<br></br>bbb");
+ aStream.WriteBytes(aDocument.getStr(), aDocument.getLength());
+ aStream.Seek(0);
+
+ tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream);
+ xParser->SetNamespace(u"reqif-xhtml");
+ xParser->CallParser();
+
+ // This was 2, <br></br> was interpreted as 2 line breaks in XHTML mode.
+ CPPUNIT_ASSERT_EQUAL(1, xParser->m_nLineBreakCount);
+}
+
+CPPUNIT_TEST_FIXTURE(Test, testCdata)
+{
+ // Given a document with CDATA:
+ SvMemoryStream aStream;
+ OString aDocument("A<![CDATA[B &uuml; &lt;]]>C");
+ aStream.WriteBytes(aDocument.getStr(), aDocument.getLength());
+ aStream.Seek(0);
+
+ // When parsing that HTML:
+ tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream);
+ xParser->CallParser();
+
+ // Then make sure that we get a cdata token with the correct content:
+ // Without the accompanying fix in place, this test would have failed with:
+ // - Expected: B &uuml; &lt;
+ // - Actual :
+ // i.e. the content inside CDATA was lost.
+ CPPUNIT_ASSERT_EQUAL(OUString("B &uuml; &lt;"), xParser->m_aCdata);
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */