summaryrefslogtreecommitdiffstats
path: root/svtools/source/svhtml
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:06:44 +0000
commited5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch)
tree7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /svtools/source/svhtml
parentInitial commit. (diff)
downloadlibreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz
libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'svtools/source/svhtml')
-rw-r--r--svtools/source/svhtml/HtmlWriter.cxx176
-rw-r--r--svtools/source/svhtml/htmlkywd.cxx816
-rw-r--r--svtools/source/svhtml/htmlout.cxx1007
-rw-r--r--svtools/source/svhtml/htmlsupp.cxx159
-rw-r--r--svtools/source/svhtml/parhtml.cxx2200
5 files changed, 4358 insertions, 0 deletions
diff --git a/svtools/source/svhtml/HtmlWriter.cxx b/svtools/source/svhtml/HtmlWriter.cxx
new file mode 100644
index 000000000..b813c7ee5
--- /dev/null
+++ b/svtools/source/svhtml/HtmlWriter.cxx
@@ -0,0 +1,176 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <svtools/HtmlWriter.hxx>
+#include <tools/stream.hxx>
+#include <sal/log.hxx>
+#include <svtools/htmlout.hxx>
+
+HtmlWriter::HtmlWriter(SvStream& rStream, std::string_view rNamespace) :
+ mrStream(rStream),
+ mbElementOpen(false),
+ mbCharactersWritten(false),
+ mbPrettyPrint(true)
+{
+ if (!rNamespace.empty())
+ {
+ // Convert namespace alias to a prefix.
+ maNamespace = OString::Concat(rNamespace) + ":";
+ }
+}
+
+HtmlWriter::~HtmlWriter()
+{}
+
+void HtmlWriter::prettyPrint(bool b)
+{
+ mbPrettyPrint = b;
+}
+
+void HtmlWriter::start(const OString& aElement)
+{
+ if (mbElementOpen)
+ {
+ mrStream.WriteChar('>');
+ if (mbPrettyPrint)
+ mrStream.WriteChar('\n');
+ }
+ maElementStack.push_back(aElement);
+
+ if (mbPrettyPrint)
+ {
+ for(size_t i = 0; i < maElementStack.size() - 1; i++)
+ {
+ mrStream.WriteCharPtr(" ");
+ }
+ }
+
+ mrStream.WriteChar('<');
+ mrStream.WriteOString(OStringConcatenation(maNamespace + aElement));
+ mbElementOpen = true;
+}
+
+void HtmlWriter::single(const OString &aContent)
+{
+ start(aContent);
+ end();
+}
+
+void HtmlWriter::endAttribute()
+{
+ if (mbElementOpen)
+ {
+ mrStream.WriteCharPtr("/>");
+ if (mbPrettyPrint)
+ mrStream.WriteCharPtr("\n");
+ mbElementOpen = false;
+ }
+}
+
+void HtmlWriter::flushStack()
+{
+ while (!maElementStack.empty())
+ {
+ end();
+ }
+}
+
+bool HtmlWriter::end(const OString& aElement)
+{
+ bool bExpected = maElementStack.back() == aElement;
+ SAL_WARN_IF(!bExpected, "svtools", "HtmlWriter: end element mismatch - '" << aElement << "' expected '" << maElementStack.back() << "'");
+ end();
+ return bExpected;
+}
+
+void HtmlWriter::end()
+{
+ if (mbElementOpen && !mbCharactersWritten)
+ {
+ mrStream.WriteCharPtr("/>");
+ if (mbPrettyPrint)
+ mrStream.WriteCharPtr("\n");
+ }
+ else
+ {
+ if (mbPrettyPrint)
+ {
+ for(size_t i = 0; i < maElementStack.size() - 1; i++)
+ {
+ mrStream.WriteCharPtr(" ");
+ }
+ }
+ mrStream.WriteCharPtr("</");
+ mrStream.WriteOString(OStringConcatenation(maNamespace + maElementStack.back()));
+ mrStream.WriteCharPtr(">");
+ if (mbPrettyPrint)
+ mrStream.WriteCharPtr("\n");
+ }
+ maElementStack.pop_back();
+ mbElementOpen = false;
+ mbCharactersWritten = false;
+}
+
+void HtmlWriter::writeAttribute(SvStream& rStream, std::string_view aAttribute, sal_Int32 aValue)
+{
+ writeAttribute(rStream, aAttribute, OString::number(aValue));
+}
+
+void HtmlWriter::writeAttribute(SvStream& rStream, std::string_view aAttribute, std::string_view aValue)
+{
+ rStream.WriteOString(aAttribute);
+ rStream.WriteChar('=');
+ rStream.WriteChar('"');
+ HTMLOutFuncs::Out_String(rStream, OStringToOUString(aValue, RTL_TEXTENCODING_UTF8));
+ rStream.WriteChar('"');
+}
+
+void HtmlWriter::attribute(std::string_view aAttribute, std::string_view aValue)
+{
+ if (mbElementOpen && !aAttribute.empty() && !aValue.empty())
+ {
+ mrStream.WriteChar(' ');
+ writeAttribute(mrStream, aAttribute, aValue);
+ }
+}
+
+void HtmlWriter::attribute(std::string_view aAttribute, const sal_Int32 aValue)
+{
+ attribute(aAttribute, OString::number(aValue));
+}
+
+void HtmlWriter::attribute(std::string_view aAttribute, const char* pValue)
+{
+ attribute(aAttribute, std::string_view(pValue));
+}
+
+void HtmlWriter::attribute(std::string_view aAttribute, std::u16string_view aValue)
+{
+ attribute(aAttribute, OUStringToOString(aValue, RTL_TEXTENCODING_UTF8));
+}
+
+void HtmlWriter::attribute(std::string_view aAttribute)
+{
+ if (mbElementOpen && !aAttribute.empty())
+ {
+ mrStream.WriteChar(' ');
+ mrStream.WriteOString(aAttribute);
+ }
+}
+
+void HtmlWriter::characters(std::string_view rChars)
+{
+ if (!mbCharactersWritten)
+ mrStream.WriteCharPtr(">");
+ mrStream.WriteOString(rChars);
+ mbCharactersWritten = true;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx
new file mode 100644
index 000000000..584322fac
--- /dev/null
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -0,0 +1,816 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <algorithm>
+#include <string_view>
+
+#include <o3tl/string_view.hxx>
+#include <sal/types.h>
+#include <rtl/ustring.hxx>
+#include <svtools/htmltokn.h>
+#include <svtools/htmlkywd.hxx>
+
+// If this is odd, then getOnToken() breaks.
+static_assert(static_cast<sal_Int16>(HtmlTokenId::ABBREVIATION_ON) % 2 == 0);
+
+namespace {
+
+template<typename T>
+struct TokenEntry
+{
+ std::u16string_view sToken;
+ T nToken;
+};
+
+}
+
+template<typename T>
+static bool sortCompare(const TokenEntry<T> & lhs, const TokenEntry<T> & rhs)
+{
+ return lhs.sToken < rhs.sToken;
+}
+template<typename T>
+static bool findCompare(const TokenEntry<T> & lhs, std::u16string_view rhs)
+{
+ return lhs.sToken < rhs;
+}
+template<typename T, size_t LEN>
+static T search(TokenEntry<T> const (&dataTable)[LEN], std::u16string_view key, T notFoundValue)
+{
+ auto findIt = std::lower_bound( std::begin(dataTable), std::end(dataTable),
+ key, findCompare<T> );
+ if (findIt != std::end(dataTable) && key == findIt->sToken)
+ return findIt->nToken;
+ return notFoundValue;
+}
+
+using HTML_TokenEntry = TokenEntry<HtmlTokenId>;
+
+// this array is sorted by the name (even if it doesn't look like it from the constant names)
+HTML_TokenEntry const aHTMLTokenTab[] = {
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_comment), HtmlTokenId::COMMENT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_doctype), HtmlTokenId::DOCTYPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_cdata), HtmlTokenId::CDATA},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_anchor), HtmlTokenId::ANCHOR_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_abbreviation), HtmlTokenId::ABBREVIATION_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_acronym), HtmlTokenId::ACRONYM_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_address), HtmlTokenId::ADDRESS_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_applet), HtmlTokenId::APPLET_ON}, // HotJava
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_area), HtmlTokenId::AREA}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_author), HtmlTokenId::AUTHOR_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_bold), HtmlTokenId::BOLD_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_banner), HtmlTokenId::BANNER_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_base), HtmlTokenId::BASE}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_basefont), HtmlTokenId::BASEFONT_ON}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_bigprint), HtmlTokenId::BIGPRINT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_blink), HtmlTokenId::BLINK_ON}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_blockquote), HtmlTokenId::BLOCKQUOTE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_body), HtmlTokenId::BODY_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_blockquote30), HtmlTokenId::BLOCKQUOTE30_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_linebreak), HtmlTokenId::LINEBREAK},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_caption), HtmlTokenId::CAPTION_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_center), HtmlTokenId::CENTER_ON}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_citation), HtmlTokenId::CITATION_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_code), HtmlTokenId::CODE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_col), HtmlTokenId::COL_ON}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_colgroup), HtmlTokenId::COLGROUP_ON}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_comment2), HtmlTokenId::COMMENT2_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_credit), HtmlTokenId::CREDIT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_dd), HtmlTokenId::DD_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_deletedtext), HtmlTokenId::DELETEDTEXT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_definstance), HtmlTokenId::DEFINSTANCE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_dirlist), HtmlTokenId::DIRLIST_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_division), HtmlTokenId::DIVISION_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_deflist), HtmlTokenId::DEFLIST_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_dt), HtmlTokenId::DT_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_emphasis), HtmlTokenId::EMPHASIS_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_embed), HtmlTokenId::EMBED}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_figure), HtmlTokenId::FIGURE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_footnote), HtmlTokenId::FOOTNOTE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_font), HtmlTokenId::FONT_ON}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_form), HtmlTokenId::FORM_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_frame), HtmlTokenId::FRAME_ON}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_frameset), HtmlTokenId::FRAMESET_ON}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head1), HtmlTokenId::HEAD1_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head2), HtmlTokenId::HEAD2_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head3), HtmlTokenId::HEAD3_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head4), HtmlTokenId::HEAD4_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head5), HtmlTokenId::HEAD5_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head6), HtmlTokenId::HEAD6_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head), HtmlTokenId::HEAD_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_horzrule), HtmlTokenId::HORZRULE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_html), HtmlTokenId::HTML_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_italic), HtmlTokenId::ITALIC_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_iframe), HtmlTokenId::IFRAME_ON}, // IE 3.0b2
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_image), HtmlTokenId::IMAGE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_input), HtmlTokenId::INPUT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_insertedtext), HtmlTokenId::INSERTEDTEXT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_keyboard), HtmlTokenId::KEYBOARD_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_language), HtmlTokenId::LANGUAGE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_listheader), HtmlTokenId::LISTHEADER_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_li), HtmlTokenId::LI_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_link), HtmlTokenId::LINK}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_listing), HtmlTokenId::LISTING_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_map), HtmlTokenId::MAP_ON}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_marquee), HtmlTokenId::MARQUEE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_menulist), HtmlTokenId::MENULIST_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_meta), HtmlTokenId::META}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_multicol), HtmlTokenId::MULTICOL_ON}, // Netscape 3.0b5
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_nobr), HtmlTokenId::NOBR_ON}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noembed), HtmlTokenId::NOEMBED_ON}, // Netscape 2.0 ???
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noframe), HtmlTokenId::NOFRAMES_ON}, // Netscape 2.0 ???
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noframes), HtmlTokenId::NOFRAMES_ON}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noscript), HtmlTokenId::NOSCRIPT_ON}, // Netscape 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_note), HtmlTokenId::NOTE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_object), HtmlTokenId::OBJECT_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_orderlist), HtmlTokenId::ORDERLIST_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_option), HtmlTokenId::OPTION},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_parabreak), HtmlTokenId::PARABREAK_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_param), HtmlTokenId::PARAM}, // HotJava
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_person), HtmlTokenId::PERSON_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_plaintext2), HtmlTokenId::PLAINTEXT2_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_preformtxt), HtmlTokenId::PREFORMTXT_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_shortquote), HtmlTokenId::SHORTQUOTE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_strikethrough), HtmlTokenId::STRIKETHROUGH_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_sample), HtmlTokenId::SAMPLE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_script), HtmlTokenId::SCRIPT_ON}, // HTML 3.2
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_sdfield), HtmlTokenId::SDFIELD_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_select), HtmlTokenId::SELECT_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_smallprint), HtmlTokenId::SMALLPRINT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_spacer), HtmlTokenId::SPACER}, // Netscape 3.0b5
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_span), HtmlTokenId::SPAN_ON}, // Style Sheets
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_strike), HtmlTokenId::STRIKE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_strong), HtmlTokenId::STRONG_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_style), HtmlTokenId::STYLE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_subscript), HtmlTokenId::SUBSCRIPT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_superscript), HtmlTokenId::SUPERSCRIPT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_plaintext), HtmlTokenId::PLAINTEXT_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_table), HtmlTokenId::TABLE_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tbody), HtmlTokenId::TBODY_ON}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tabledata), HtmlTokenId::TABLEDATA_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_textarea), HtmlTokenId::TEXTAREA_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tfoot), HtmlTokenId::TFOOT_ON}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tableheader), HtmlTokenId::TABLEHEADER_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_thead), HtmlTokenId::THEAD_ON}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_title), HtmlTokenId::TITLE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tablerow), HtmlTokenId::TABLEROW_ON}, // HTML 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_teletype), HtmlTokenId::TELETYPE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_underline), HtmlTokenId::UNDERLINE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_unorderlist), HtmlTokenId::UNORDERLIST_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_variable), HtmlTokenId::VARIABLE_ON},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_xmp), HtmlTokenId::XMP_ON},
+};
+
+
+HtmlTokenId GetHTMLToken( std::u16string_view rName )
+{
+ static bool bSortKeyWords = false;
+ if( !bSortKeyWords )
+ {
+ assert( std::is_sorted( std::begin(aHTMLTokenTab), std::end(aHTMLTokenTab), sortCompare<HtmlTokenId> ) );
+ bSortKeyWords = true;
+ }
+
+ if( o3tl::starts_with( rName, u"" OOO_STRING_SVTOOLS_HTML_comment ))
+ return HtmlTokenId::COMMENT;
+
+ return search( aHTMLTokenTab, rName, HtmlTokenId::NONE);
+}
+
+using HTML_CharEntry = TokenEntry<sal_Unicode>;
+
+// Flag: RTF token table has already been sorted
+static bool bSortCharKeyWords = false;
+
+static HTML_CharEntry aHTMLCharNameTab[] = {
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_lt), 60},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_gt), 62},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_amp), 38},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_quot), 34},
+
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Agrave), 192},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Aacute), 193},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Acirc), 194},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Atilde), 195},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Auml), 196},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Aring), 197},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_AElig), 198},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ccedil), 199},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Egrave), 200},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Eacute), 201},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ecirc), 202},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Euml), 203},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Igrave), 204},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Iacute), 205},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Icirc), 206},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Iuml), 207},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_ETH), 208},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ntilde), 209},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ograve), 210},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Oacute), 211},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ocirc), 212},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Otilde), 213},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ouml), 214},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Oslash), 216},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ugrave), 217},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Uacute), 218},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ucirc), 219},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Uuml), 220},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Yacute), 221},
+
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_THORN), 222},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_szlig), 223},
+
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_agrave), 224},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_aacute), 225},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_acirc), 226},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_atilde), 227},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_auml), 228},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_aring), 229},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_aelig), 230},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ccedil), 231},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_egrave), 232},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_eacute), 233},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ecirc), 234},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_euml), 235},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_igrave), 236},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iacute), 237},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_icirc), 238},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iuml), 239},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_eth), 240},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ntilde), 241},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ograve), 242},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oacute), 243},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ocirc), 244},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_otilde), 245},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ouml), 246},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oslash), 248},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ugrave), 249},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uacute), 250},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ucirc), 251},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uuml), 252},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_yacute), 253},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_thorn), 254},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_yuml), 255},
+
+// special characters
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_acute), 180},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_brvbar), 166},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cedil), 184},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cent), 162},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_copy), 169},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_curren), 164},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_deg), 176},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_divide), 247},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frac12), 189},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frac14), 188},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frac34), 190},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iexcl), 161},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iquest), 191},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_laquo), 171},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_macr), 175},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_micro), 181},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_middot), 183},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_not), 172},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ordf), 170},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ordm), 186},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_para), 182},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_plusmn), 177},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_pound), 163},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_raquo), 187},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_reg), 174},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sect), 167},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup1), 185},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup2), 178},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup3), 179},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_times), 215},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uml), 168},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_yen), 165},
+
+// special characters), which will be converted to tokens !!!
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nbsp), 1},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_shy), 2},
+
+
+// HTML4
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_OElig), 338},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oelig), 339},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Scaron), 352},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_scaron), 353},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Yuml), 376},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_fnof), 402},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_circ), 710},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_tilde), 732},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Alpha), 913},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Beta), 914},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Gamma), 915},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Delta), 916},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Epsilon), 917},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Zeta), 918},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Eta), 919},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Theta), 920},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Iota), 921},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Kappa), 922},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Lambda), 923},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Mu), 924},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Nu), 925},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Xi), 926},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Omicron), 927},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Pi), 928},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Rho), 929},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Sigma), 931},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Tau), 932},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Upsilon), 933},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Phi), 934},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Chi), 935},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Psi), 936},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Omega), 937},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_alpha), 945},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_beta), 946},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_gamma), 947},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_delta), 948},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_epsilon), 949},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_zeta), 950},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_eta), 951},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_theta), 952},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iota), 953},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_kappa), 954},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lambda), 955},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_mu), 956},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nu), 957},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_xi), 958},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_omicron), 959},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_pi), 960},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rho), 961},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sigmaf), 962},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sigma), 963},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_tau), 964},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_upsilon), 965},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_phi), 966},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_chi), 967},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_psi), 968},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_omega), 969},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_thetasym), 977},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_upsih), 978},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_piv), 982},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ensp), 8194},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_emsp), 8195},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_thinsp), 8201},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_zwnj), 8204},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_zwj), 8205},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lrm), 8206},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rlm), 8207},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ndash), 8211},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_mdash), 8212},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lsquo), 8216},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rsquo), 8217},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sbquo), 8218},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ldquo), 8220},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rdquo), 8221},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_bdquo), 8222},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_dagger), 8224},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Dagger), 8225},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_bull), 8226},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_hellip), 8230},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_permil), 8240},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_prime), 8242},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Prime), 8243},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lsaquo), 8249},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rsaquo), 8250},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oline), 8254},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frasl), 8260},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_euro), 8364},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_image), 8465},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_weierp), 8472},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_real), 8476},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_trade), 8482},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_alefsym), 8501},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_larr), 8592},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uarr), 8593},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rarr), 8594},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_darr), 8595},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_harr), 8596},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_crarr), 8629},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lArr), 8656},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uArr), 8657},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rArr), 8658},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_dArr), 8659},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_hArr), 8660},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_forall), 8704},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_part), 8706},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_exist), 8707},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_empty), 8709},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nabla), 8711},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_isin), 8712},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_notin), 8713},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ni), 8715},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_prod), 8719},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sum), 8721},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_minus), 8722},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lowast), 8727},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_radic), 8730},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_prop), 8733},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_infin), 8734},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ang), 8736},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_and), 8743},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_or), 8744},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cap), 8745},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cup), 8746},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_int), 8747},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_there4), 8756},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sim), 8764},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cong), 8773},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_asymp), 8776},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ne), 8800},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_equiv), 8801},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_le), 8804},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ge), 8805},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sub), 8834},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup), 8835},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nsub), 8836},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sube), 8838},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_supe), 8839},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oplus), 8853},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_otimes), 8855},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_perp), 8869},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sdot), 8901},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lceil), 8968},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rceil), 8969},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lfloor), 8970},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rfloor), 8971},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lang), 9001},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rang), 9002},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_loz), 9674},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_spades), 9824},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_clubs), 9827},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_hearts), 9829},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_diams), 9830}
+};
+
+sal_Unicode GetHTMLCharName( std::u16string_view rName )
+{
+ if( !bSortCharKeyWords )
+ {
+ std::sort( std::begin(aHTMLCharNameTab), std::end(aHTMLCharNameTab),
+ sortCompare<sal_Unicode> );
+ bSortCharKeyWords = true;
+ }
+
+ return search<sal_Unicode>( aHTMLCharNameTab, rName, 0);
+}
+
+// Flag: Options table has already been sorted
+static bool bSortOptionKeyWords = false;
+
+using HTML_OptionEntry = TokenEntry<HtmlOptionId>;
+
+static HTML_OptionEntry aHTMLOptionTab[] = {
+
+// Attributes without value
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_checked), HtmlOptionId::CHECKED},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_compact), HtmlOptionId::COMPACT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_declare), HtmlOptionId::DECLARE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_disabled), HtmlOptionId::DISABLED},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_ismap), HtmlOptionId::ISMAP},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_mayscript), HtmlOptionId::MAYSCRIPT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_multiple), HtmlOptionId::MULTIPLE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_nohref), HtmlOptionId::NOHREF}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_noresize), HtmlOptionId::NORESIZE}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_noshade), HtmlOptionId::NOSHADE}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_nowrap), HtmlOptionId::NOWRAP},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdfixed), HtmlOptionId::SDFIXED},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_selected), HtmlOptionId::SELECTED},
+
+// Attributes with a string value
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_accept), HtmlOptionId::ACCEPT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_accesskey), HtmlOptionId::ACCESSKEY},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_alt), HtmlOptionId::ALT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_axis), HtmlOptionId::AXIS},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_char), HtmlOptionId::CHAR}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_charset), HtmlOptionId::CHARSET},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_class), HtmlOptionId::CLASS},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_code), HtmlOptionId::CODE}, // HotJava
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_codetype), HtmlOptionId::CODETYPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_content), HtmlOptionId::CONTENT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_coords), HtmlOptionId::COORDS}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_enctype), HtmlOptionId::ENCTYPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_face), HtmlOptionId::FACE}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_frameborder), HtmlOptionId::FRAMEBORDER}, // IExplorer 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_httpequiv), HtmlOptionId::HTTPEQUIV},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_language), HtmlOptionId::LANGUAGE}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_name), HtmlOptionId::NAME},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_prompt), HtmlOptionId::PROMPT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_shape), HtmlOptionId::SHAPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_standby), HtmlOptionId::STANDBY},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_style), HtmlOptionId::STYLE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_title), HtmlOptionId::TITLE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_value), HtmlOptionId::VALUE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDval), HtmlOptionId::SDVAL}, // StarDiv NumberValue
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum), HtmlOptionId::SDNUM}, // StarDiv NumberFormat
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), HtmlOptionId::SDLIBRARY},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule), HtmlOptionId::SDMODULE},
+
+// Attributes with a SGML identifier value
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_id), HtmlOptionId::ID},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_target), HtmlOptionId::TARGET}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_to), HtmlOptionId::TO},
+
+// Attributes with a URI value
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_action), HtmlOptionId::ACTION},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_archive), HtmlOptionId::ARCHIVE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_background), HtmlOptionId::BACKGROUND},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_classid), HtmlOptionId::CLASSID},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_codebase), HtmlOptionId::CODEBASE}, // HotJava
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_data), HtmlOptionId::DATA},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_href), HtmlOptionId::HREF},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_script), HtmlOptionId::SCRIPT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_src), HtmlOptionId::SRC},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_usemap), HtmlOptionId::USEMAP}, // Netscape 2.0
+
+// Attributes with a color value (all Netscape versions)
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_alink), HtmlOptionId::ALINK},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bgcolor), HtmlOptionId::BGCOLOR},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bordercolor), HtmlOptionId::BORDERCOLOR}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bordercolorlight), HtmlOptionId::BORDERCOLORLIGHT}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bordercolordark), HtmlOptionId::BORDERCOLORDARK}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_color), HtmlOptionId::COLOR},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_link), HtmlOptionId::LINK},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_text), HtmlOptionId::TEXT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_vlink), HtmlOptionId::VLINK},
+
+// Attributes with a numerical value
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_border), HtmlOptionId::BORDER},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_cellspacing),HtmlOptionId::CELLSPACING}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_cellpadding),HtmlOptionId::CELLPADDING}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_charoff), HtmlOptionId::CHAROFF}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_colspan), HtmlOptionId::COLSPAN},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_framespacing), HtmlOptionId::FRAMESPACING}, // IExplorer 3.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_gutter), HtmlOptionId::GUTTER}, // Netscape 3.0b5
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_height), HtmlOptionId::HEIGHT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_hspace), HtmlOptionId::HSPACE}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_left), HtmlOptionId::LEFT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_loop), HtmlOptionId::LOOP}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_marginheight),HtmlOptionId::MARGINHEIGHT}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_marginwidth),HtmlOptionId::MARGINWIDTH}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_maxlength), HtmlOptionId::MAXLENGTH},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rowspan), HtmlOptionId::ROWSPAN},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_scrollamount), HtmlOptionId::SCROLLAMOUNT}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_scrolldelay), HtmlOptionId::SCROLLDELAY}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_span), HtmlOptionId::SPAN}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_tabindex), HtmlOptionId::TABINDEX},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_vspace), HtmlOptionId::VSPACE}, // Netscape
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_width), HtmlOptionId::WIDTH},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_zindex), HtmlOptionId::ZINDEX},
+
+// Attributes with enum values
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_behavior), HtmlOptionId::BEHAVIOR}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_clear), HtmlOptionId::CLEAR},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_dir), HtmlOptionId::DIR}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_direction), HtmlOptionId::DIRECTION}, // IExplorer 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_format), HtmlOptionId::FORMAT},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_frame), HtmlOptionId::FRAME}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_lang), HtmlOptionId::LANG},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_method), HtmlOptionId::METHOD},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rel), HtmlOptionId::REL},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rev), HtmlOptionId::REV},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rules), HtmlOptionId::RULES}, // HTML 3 Table Model Draft
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_scrolling), HtmlOptionId::SCROLLING}, // Netscape 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdreadonly), HtmlOptionId::SDREADONLY},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_subtype), HtmlOptionId::SUBTYPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_type), HtmlOptionId::TYPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valign), HtmlOptionId::VALIGN},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valuetype), HtmlOptionId::VALUETYPE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_wrap), HtmlOptionId::WRAP},
+
+// Attributes with script code value
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onblur), HtmlOptionId::ONBLUR}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onchange), HtmlOptionId::ONCHANGE}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onclick), HtmlOptionId::ONCLICK}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onfocus), HtmlOptionId::ONFOCUS}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onload), HtmlOptionId::ONLOAD}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onmouseover), HtmlOptionId::ONMOUSEOVER}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onreset), HtmlOptionId::ONRESET}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onselect), HtmlOptionId::ONSELECT}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onsubmit), HtmlOptionId::ONSUBMIT}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onunload), HtmlOptionId::ONUNLOAD}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onabort), HtmlOptionId::ONABORT}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onerror), HtmlOptionId::ONERROR}, // JavaScript
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onmouseout), HtmlOptionId::ONMOUSEOUT}, // JavaScript
+
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonblur), HtmlOptionId::SDONBLUR}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonchange), HtmlOptionId::SDONCHANGE}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonclick), HtmlOptionId::SDONCLICK}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonfocus), HtmlOptionId::SDONFOCUS}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonload), HtmlOptionId::SDONLOAD}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonmouseover), HtmlOptionId::SDONMOUSEOVER}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonreset), HtmlOptionId::SDONRESET}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonselect), HtmlOptionId::SDONSELECT}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonsubmit), HtmlOptionId::SDONSUBMIT}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonunload), HtmlOptionId::SDONUNLOAD}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonabort), HtmlOptionId::SDONABORT}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonerror), HtmlOptionId::SDONERROR}, // StarBasic
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonmouseout), HtmlOptionId::SDONMOUSEOUT}, // StarBasic
+
+// Attributes with context sensitive values
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_align), HtmlOptionId::ALIGN},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_cols), HtmlOptionId::COLS}, // Netscape 2.0 vs HTML 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rows), HtmlOptionId::ROWS}, // Netscape 2.0 vs HTML 2.0
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_size), HtmlOptionId::SIZE},
+ {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_start), HtmlOptionId::START}, // Netscape 2.0 vs IExplorer 2.0
+};
+
+HtmlOptionId GetHTMLOption( std::u16string_view rName )
+{
+ if( !bSortOptionKeyWords )
+ {
+ std::sort( std::begin(aHTMLOptionTab), std::end(aHTMLOptionTab), sortCompare<HtmlOptionId> );
+ bSortOptionKeyWords = true;
+ }
+
+ return search( aHTMLOptionTab, rName, HtmlOptionId::UNKNOWN);
+}
+
+
+using HTML_ColorEntry = TokenEntry<sal_uInt32>;
+
+// Flag: color table has already been sorted
+static bool bSortColorKeyWords = false;
+
+// Color names are not exported (source:
+// "http://www.uio.no/~mnbjerke/colors_w.html")
+// "http://www.infi.net/wwwimages/colorindex.html" seem to be buggy.
+HTML_ColorEntry const aHTMLColorNameTab[] = {
+ { std::u16string_view(u"aliceblue"), 0x00f0f8ffUL },
+ { std::u16string_view(u"antiquewhite"), 0x00faebd7UL },
+ { std::u16string_view(u"aqua"), 0x0000ffffUL },
+ { std::u16string_view(u"aquamarine"), 0x007fffd4UL },
+ { std::u16string_view(u"azure"), 0x00f0ffffUL },
+ { std::u16string_view(u"beige"), 0x00f5f5dcUL },
+ { std::u16string_view(u"bisque"), 0x00ffe4c4UL },
+ { std::u16string_view(u"black"), 0x00000000UL },
+ { std::u16string_view(u"blanchedalmond"), 0x00ffebcdUL },
+ { std::u16string_view(u"blue"), 0x000000ffUL },
+ { std::u16string_view(u"blueviolet"), 0x008a2be2UL },
+ { std::u16string_view(u"brown"), 0x00a52a2aUL },
+ { std::u16string_view(u"burlywood"), 0x00deb887UL },
+ { std::u16string_view(u"cadetblue"), 0x005f9ea0UL },
+ { std::u16string_view(u"chartreuse"), 0x007fff00UL },
+ { std::u16string_view(u"chocolate"), 0x00d2691eUL },
+ { std::u16string_view(u"coral"), 0x00ff7f50UL },
+ { std::u16string_view(u"cornflowerblue"), 0x006495edUL },
+ { std::u16string_view(u"cornsilk"), 0x00fff8dcUL },
+ { std::u16string_view(u"crimson"), 0x00dc143cUL },
+ { std::u16string_view(u"cyan"), 0x0000ffffUL },
+ { std::u16string_view(u"darkblue"), 0x0000008bUL },
+ { std::u16string_view(u"darkcyan"), 0x00008b8bUL },
+ { std::u16string_view(u"darkgoldenrod"), 0x00b8860bUL },
+ { std::u16string_view(u"darkgray"), 0x00a9a9a9UL },
+ { std::u16string_view(u"darkgreen"), 0x00006400UL },
+ { std::u16string_view(u"darkkhaki"), 0x00bdb76bUL },
+ { std::u16string_view(u"darkmagenta"), 0x008b008bUL },
+ { std::u16string_view(u"darkolivegreen"), 0x00556b2fUL },
+ { std::u16string_view(u"darkorange"), 0x00ff8c00UL },
+ { std::u16string_view(u"darkorchid"), 0x009932ccUL },
+ { std::u16string_view(u"darkred"), 0x008b0000UL },
+ { std::u16string_view(u"darksalmon"), 0x00e9967aUL },
+ { std::u16string_view(u"darkseagreen"), 0x008fbc8fUL },
+ { std::u16string_view(u"darkslateblue"), 0x00483d8bUL },
+ { std::u16string_view(u"darkslategray"), 0x002f4f4fUL },
+ { std::u16string_view(u"darkturquoise"), 0x0000ced1UL },
+ { std::u16string_view(u"darkviolet"), 0x009400d3UL },
+ { std::u16string_view(u"deeppink"), 0x00ff1493UL },
+ { std::u16string_view(u"deepskyblue"), 0x0000bfffUL },
+ { std::u16string_view(u"dimgray"), 0x00696969UL },
+ { std::u16string_view(u"dodgerblue"), 0x001e90ffUL },
+ { std::u16string_view(u"firebrick"), 0x00b22222UL },
+ { std::u16string_view(u"floralwhite"), 0x00fffaf0UL },
+ { std::u16string_view(u"forestgreen"), 0x00228b22UL },
+ { std::u16string_view(u"fuchsia"), 0x00ff00ffUL },
+ { std::u16string_view(u"gainsboro"), 0x00dcdcdcUL },
+ { std::u16string_view(u"ghostwhite"), 0x00f8f8ffUL },
+ { std::u16string_view(u"gold"), 0x00ffd700UL },
+ { std::u16string_view(u"goldenrod"), 0x00daa520UL },
+ { std::u16string_view(u"gray"), 0x00808080UL },
+ { std::u16string_view(u"green"), 0x00008000UL },
+ { std::u16string_view(u"greenyellow"), 0x00adff2fUL },
+ { std::u16string_view(u"honeydew"), 0x00f0fff0UL },
+ { std::u16string_view(u"hotpink"), 0x00ff69b4UL },
+ { std::u16string_view(u"indianred"), 0x00cd5c5cUL },
+ { std::u16string_view(u"indigo"), 0x004b0082UL },
+ { std::u16string_view(u"ivory"), 0x00fffff0UL },
+ { std::u16string_view(u"khaki"), 0x00f0e68cUL },
+ { std::u16string_view(u"lavender"), 0x00e6e6faUL },
+ { std::u16string_view(u"lavenderblush"), 0x00fff0f5UL },
+ { std::u16string_view(u"lawngreen"), 0x007cfc00UL },
+ { std::u16string_view(u"lemonchiffon"), 0x00fffacdUL },
+ { std::u16string_view(u"lightblue"), 0x00add8e6UL },
+ { std::u16string_view(u"lightcoral"), 0x00f08080UL },
+ { std::u16string_view(u"lightcyan"), 0x00e0ffffUL },
+ { std::u16string_view(u"lightgoldenrodyellow"), 0x00fafad2UL },
+ { std::u16string_view(u"lightgreen"), 0x0090ee90UL },
+ { std::u16string_view(u"lightgrey"), 0x00d3d3d3UL },
+ { std::u16string_view(u"lightpink"), 0x00ffb6c1UL },
+ { std::u16string_view(u"lightsalmon"), 0x00ffa07aUL },
+ { std::u16string_view(u"lightseagreen"), 0x0020b2aaUL },
+ { std::u16string_view(u"lightskyblue"), 0x0087cefaUL },
+ { std::u16string_view(u"lightslategray"), 0x00778899UL },
+ { std::u16string_view(u"lightsteelblue"), 0x00b0c4deUL },
+ { std::u16string_view(u"lightyellow"), 0x00ffffe0UL },
+ { std::u16string_view(u"lime"), 0x0000ff00UL },
+ { std::u16string_view(u"limegreen"), 0x0032cd32UL },
+ { std::u16string_view(u"linen"), 0x00faf0e6UL },
+ { std::u16string_view(u"magenta"), 0x00ff00ffUL },
+ { std::u16string_view(u"maroon"), 0x00800000UL },
+ { std::u16string_view(u"mediumaquamarine"), 0x0066cdaaUL },
+ { std::u16string_view(u"mediumblue"), 0x000000cdUL },
+ { std::u16string_view(u"mediumorchid"), 0x00ba55d3UL },
+ { std::u16string_view(u"mediumpurple"), 0x009370dbUL },
+ { std::u16string_view(u"mediumseagreen"), 0x003cb371UL },
+ { std::u16string_view(u"mediumslateblue"), 0x007b68eeUL },
+ { std::u16string_view(u"mediumspringgreen"), 0x0000fa9aUL },
+ { std::u16string_view(u"mediumturquoise"), 0x0048d1ccUL },
+ { std::u16string_view(u"mediumvioletred"), 0x00c71585UL },
+ { std::u16string_view(u"midnightblue"), 0x00191970UL },
+ { std::u16string_view(u"mintcream"), 0x00f5fffaUL },
+ { std::u16string_view(u"mistyrose"), 0x00ffe4e1UL },
+ { std::u16string_view(u"moccasin"), 0x00ffe4b5UL },
+ { std::u16string_view(u"navajowhite"), 0x00ffdeadUL },
+ { std::u16string_view(u"navy"), 0x00000080UL },
+ { std::u16string_view(u"oldlace"), 0x00fdf5e6UL },
+ { std::u16string_view(u"olive"), 0x00808000UL },
+ { std::u16string_view(u"olivedrab"), 0x006b8e23UL },
+ { std::u16string_view(u"orange"), 0x00ffa500UL },
+ { std::u16string_view(u"orangered"), 0x00ff4500UL },
+ { std::u16string_view(u"orchid"), 0x00da70d6UL },
+ { std::u16string_view(u"palegoldenrod"), 0x00eee8aaUL },
+ { std::u16string_view(u"palegreen"), 0x0098fb98UL },
+ { std::u16string_view(u"paleturquoise"), 0x00afeeeeUL },
+ { std::u16string_view(u"palevioletred"), 0x00db7093UL },
+ { std::u16string_view(u"papayawhip"), 0x00ffefd5UL },
+ { std::u16string_view(u"peachpuff"), 0x00ffdab9UL },
+ { std::u16string_view(u"peru"), 0x00cd853fUL },
+ { std::u16string_view(u"pink"), 0x00ffc0cbUL },
+ { std::u16string_view(u"plum"), 0x00dda0ddUL },
+ { std::u16string_view(u"powderblue"), 0x00b0e0e6UL },
+ { std::u16string_view(u"purple"), 0x00800080UL },
+ { std::u16string_view(u"red"), 0x00ff0000UL },
+ { std::u16string_view(u"rosybrown"), 0x00bc8f8fUL },
+ { std::u16string_view(u"royalblue"), 0x004169e1UL },
+ { std::u16string_view(u"saddlebrown"), 0x008b4513UL },
+ { std::u16string_view(u"salmon"), 0x00fa8072UL },
+ { std::u16string_view(u"sandybrown"), 0x00f4a460UL },
+ { std::u16string_view(u"seagreen"), 0x002e8b57UL },
+ { std::u16string_view(u"seashell"), 0x00fff5eeUL },
+ { std::u16string_view(u"sienna"), 0x00a0522dUL },
+ { std::u16string_view(u"silver"), 0x00c0c0c0UL },
+ { std::u16string_view(u"skyblue"), 0x0087ceebUL },
+ { std::u16string_view(u"slateblue"), 0x006a5acdUL },
+ { std::u16string_view(u"slategray"), 0x00708090UL },
+ { std::u16string_view(u"snow"), 0x00fffafaUL },
+ { std::u16string_view(u"springgreen"), 0x0000ff7fUL },
+ { std::u16string_view(u"steelblue"), 0x004682b4UL },
+ { std::u16string_view(u"tan"), 0x00d2b48cUL },
+ { std::u16string_view(u"teal"), 0x00008080UL },
+ { std::u16string_view(u"thistle"), 0x00d8bfd8UL },
+ { std::u16string_view(u"tomato"), 0x00ff6347UL },
+ { std::u16string_view(u"turquoise"), 0x0040e0d0UL },
+ { std::u16string_view(u"violet"), 0x00ee82eeUL },
+ { std::u16string_view(u"wheat"), 0x00f5deb3UL },
+ { std::u16string_view(u"white"), 0x00ffffffUL },
+ { std::u16string_view(u"whitesmoke"), 0x00f5f5f5UL },
+ { std::u16string_view(u"yellow"), 0x00ffff00UL },
+ { std::u16string_view(u"yellowgreen"), 0x009acd32UL }
+};
+
+sal_uInt32 GetHTMLColor( const OUString& rName )
+{
+ if( !bSortColorKeyWords )
+ {
+ assert( std::is_sorted( std::begin(aHTMLColorNameTab), std::end(aHTMLColorNameTab),
+ sortCompare<sal_uInt32> ) );
+ bSortColorKeyWords = true;
+ }
+
+ OUString aLowerCase(rName.toAsciiLowerCase());
+
+ return search<sal_uInt32>( aHTMLColorNameTab, aLowerCase, SAL_MAX_UINT32);
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/svtools/source/svhtml/htmlout.cxx b/svtools/source/svhtml/htmlout.cxx
new file mode 100644
index 000000000..b42e425df
--- /dev/null
+++ b/svtools/source/svhtml/htmlout.cxx
@@ -0,0 +1,1007 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <svl/numformat.hxx>
+#include <svl/zformat.hxx>
+#include <svl/macitem.hxx>
+#include <vcl/svapp.hxx>
+#include <vcl/settings.hxx>
+
+#include <svtools/HtmlWriter.hxx>
+#include <svtools/htmlout.hxx>
+#include <svtools/htmlkywd.hxx>
+#include <vcl/imap.hxx>
+#include <vcl/imaprect.hxx>
+#include <vcl/imapcirc.hxx>
+#include <vcl/imappoly.hxx>
+#include <svl/urihelper.hxx>
+#include <rtl/character.hxx>
+#include <tools/debug.hxx>
+
+#include <sstream>
+
+#define TXTCONV_BUFFER_SIZE 20
+
+static sal_Size convertUnicodeToText(const sal_Unicode* pSrcBuf, sal_Size nSrcChars, char* pDestBuf,
+ sal_Size nDestBytes, sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtChars)
+{
+ static rtl_UnicodeToTextConverter hConverter
+ = rtl_createUnicodeToTextConverter(RTL_TEXTENCODING_UTF8);
+ static rtl_UnicodeToTextContext hContext = hConverter
+ ? rtl_createUnicodeToTextContext(hConverter)
+ : reinterpret_cast<rtl_TextToUnicodeContext>(1);
+
+ return rtl_convertUnicodeToText(hConverter, hContext, pSrcBuf, nSrcChars, pDestBuf, nDestBytes,
+ nFlags, pInfo, pSrcCvtChars);
+}
+
+static const char *lcl_svhtml_GetEntityForChar( sal_uInt32 c,
+ rtl_TextEncoding eDestEnc )
+{
+ const char* pStr = nullptr;
+
+ // Note: We currently handle special cases for ISO-8859-2 here simply because
+ // the code was already submitted. But we should also handle other code pages
+ // as well as the code becomes available.
+
+ if( eDestEnc == RTL_TEXTENCODING_ISO_8859_2 || eDestEnc == RTL_TEXTENCODING_MS_1250 )
+ {
+ // Don't handle the following characters for Easter European (ISO-8859-2).
+ switch ( c )
+ {
+ case 164: // curren
+ case 184: // ccedil
+ case 193: // Aacute
+ case 194: // Acirc
+ case 196: // Auml
+ case 199: // Ccedil
+ case 201: // Eacute
+ case 203: // Euml
+ case 205: // Iacute
+ case 206: // Icirc
+ case 211: // Oacute
+ case 212: // Ocirc
+ case 214: // Ouml
+ case 215: // times
+ case 218: // Uacute
+ case 220: // Uuml
+ case 221: // Yacute
+ case 225: // aacute
+ case 226: // acirc
+ case 228: // auml
+ case 233: // eacute
+ case 235: // euml
+ case 237: // iacute
+ case 238: // icirc
+ case 243: // oacute
+ case 244: // ocirc
+ case 246: // ouml
+ case 247: // divide
+ case 250: // uacute
+ case 252: // uuml
+ case 253: // yacute
+ case 352: // Scaron
+ case 353: // scaron
+ return pStr;
+ }
+ }
+
+ // TODO: handle more special cases for other code pages.
+
+ switch( c )
+ {
+// case '\x0a': return HTMLOutFuncs::Out_Tag( rStream, OOO_STRING_SVTOOLS_HTML_linebreak );
+
+ case '<': pStr = OOO_STRING_SVTOOLS_HTML_C_lt; break;
+ case '>': pStr = OOO_STRING_SVTOOLS_HTML_C_gt; break;
+ case '&': pStr = OOO_STRING_SVTOOLS_HTML_C_amp; break;
+ case '"': pStr = OOO_STRING_SVTOOLS_HTML_C_quot; break;
+
+ case 161: pStr = OOO_STRING_SVTOOLS_HTML_S_iexcl; break;
+ case 162: pStr = OOO_STRING_SVTOOLS_HTML_S_cent; break;
+ case 163: pStr = OOO_STRING_SVTOOLS_HTML_S_pound; break;
+ case 164: pStr = OOO_STRING_SVTOOLS_HTML_S_curren; break;
+ case 165: pStr = OOO_STRING_SVTOOLS_HTML_S_yen; break;
+ case 166: pStr = OOO_STRING_SVTOOLS_HTML_S_brvbar; break;
+ case 167: pStr = OOO_STRING_SVTOOLS_HTML_S_sect; break;
+ case 168: pStr = OOO_STRING_SVTOOLS_HTML_S_uml; break;
+ case 169: pStr = OOO_STRING_SVTOOLS_HTML_S_copy; break;
+ case 170: pStr = OOO_STRING_SVTOOLS_HTML_S_ordf; break;
+ case 171: pStr = OOO_STRING_SVTOOLS_HTML_S_laquo; break;
+ case 172: pStr = OOO_STRING_SVTOOLS_HTML_S_not; break;
+ case 174: pStr = OOO_STRING_SVTOOLS_HTML_S_reg; break;
+ case 175: pStr = OOO_STRING_SVTOOLS_HTML_S_macr; break;
+ case 176: pStr = OOO_STRING_SVTOOLS_HTML_S_deg; break;
+ case 177: pStr = OOO_STRING_SVTOOLS_HTML_S_plusmn; break;
+ case 178: pStr = OOO_STRING_SVTOOLS_HTML_S_sup2; break;
+ case 179: pStr = OOO_STRING_SVTOOLS_HTML_S_sup3; break;
+ case 180: pStr = OOO_STRING_SVTOOLS_HTML_S_acute; break;
+ case 181: pStr = OOO_STRING_SVTOOLS_HTML_S_micro; break;
+ case 182: pStr = OOO_STRING_SVTOOLS_HTML_S_para; break;
+ case 183: pStr = OOO_STRING_SVTOOLS_HTML_S_middot; break;
+ case 184: pStr = OOO_STRING_SVTOOLS_HTML_S_cedil; break;
+ case 185: pStr = OOO_STRING_SVTOOLS_HTML_S_sup1; break;
+ case 186: pStr = OOO_STRING_SVTOOLS_HTML_S_ordm; break;
+ case 187: pStr = OOO_STRING_SVTOOLS_HTML_S_raquo; break;
+ case 188: pStr = OOO_STRING_SVTOOLS_HTML_S_frac14; break;
+ case 189: pStr = OOO_STRING_SVTOOLS_HTML_S_frac12; break;
+ case 190: pStr = OOO_STRING_SVTOOLS_HTML_S_frac34; break;
+ case 191: pStr = OOO_STRING_SVTOOLS_HTML_S_iquest; break;
+
+ case 192: pStr = OOO_STRING_SVTOOLS_HTML_C_Agrave; break;
+ case 193: pStr = OOO_STRING_SVTOOLS_HTML_C_Aacute; break;
+ case 194: pStr = OOO_STRING_SVTOOLS_HTML_C_Acirc; break;
+ case 195: pStr = OOO_STRING_SVTOOLS_HTML_C_Atilde; break;
+ case 196: pStr = OOO_STRING_SVTOOLS_HTML_C_Auml; break;
+ case 197: pStr = OOO_STRING_SVTOOLS_HTML_C_Aring; break;
+ case 198: pStr = OOO_STRING_SVTOOLS_HTML_C_AElig; break;
+ case 199: pStr = OOO_STRING_SVTOOLS_HTML_C_Ccedil; break;
+ case 200: pStr = OOO_STRING_SVTOOLS_HTML_C_Egrave; break;
+ case 201: pStr = OOO_STRING_SVTOOLS_HTML_C_Eacute; break;
+ case 202: pStr = OOO_STRING_SVTOOLS_HTML_C_Ecirc; break;
+ case 203: pStr = OOO_STRING_SVTOOLS_HTML_C_Euml; break;
+ case 204: pStr = OOO_STRING_SVTOOLS_HTML_C_Igrave; break;
+ case 205: pStr = OOO_STRING_SVTOOLS_HTML_C_Iacute; break;
+ case 206: pStr = OOO_STRING_SVTOOLS_HTML_C_Icirc; break;
+ case 207: pStr = OOO_STRING_SVTOOLS_HTML_C_Iuml; break;
+ case 208: pStr = OOO_STRING_SVTOOLS_HTML_C_ETH; break;
+ case 209: pStr = OOO_STRING_SVTOOLS_HTML_C_Ntilde; break;
+ case 210: pStr = OOO_STRING_SVTOOLS_HTML_C_Ograve; break;
+ case 211: pStr = OOO_STRING_SVTOOLS_HTML_C_Oacute; break;
+ case 212: pStr = OOO_STRING_SVTOOLS_HTML_C_Ocirc; break;
+ case 213: pStr = OOO_STRING_SVTOOLS_HTML_C_Otilde; break;
+ case 214: pStr = OOO_STRING_SVTOOLS_HTML_C_Ouml; break;
+ case 215: pStr = OOO_STRING_SVTOOLS_HTML_S_times; break;
+ case 216: pStr = OOO_STRING_SVTOOLS_HTML_C_Oslash; break;
+ case 217: pStr = OOO_STRING_SVTOOLS_HTML_C_Ugrave; break;
+ case 218: pStr = OOO_STRING_SVTOOLS_HTML_C_Uacute; break;
+ case 219: pStr = OOO_STRING_SVTOOLS_HTML_C_Ucirc; break;
+ case 220: pStr = OOO_STRING_SVTOOLS_HTML_C_Uuml; break;
+ case 221: pStr = OOO_STRING_SVTOOLS_HTML_C_Yacute; break;
+
+ case 222: pStr = OOO_STRING_SVTOOLS_HTML_C_THORN; break;
+ case 223: pStr = OOO_STRING_SVTOOLS_HTML_C_szlig; break;
+
+ case 224: pStr = OOO_STRING_SVTOOLS_HTML_S_agrave; break;
+ case 225: pStr = OOO_STRING_SVTOOLS_HTML_S_aacute; break;
+ case 226: pStr = OOO_STRING_SVTOOLS_HTML_S_acirc; break;
+ case 227: pStr = OOO_STRING_SVTOOLS_HTML_S_atilde; break;
+ case 228: pStr = OOO_STRING_SVTOOLS_HTML_S_auml; break;
+ case 229: pStr = OOO_STRING_SVTOOLS_HTML_S_aring; break;
+ case 230: pStr = OOO_STRING_SVTOOLS_HTML_S_aelig; break;
+ case 231: pStr = OOO_STRING_SVTOOLS_HTML_S_ccedil; break;
+ case 232: pStr = OOO_STRING_SVTOOLS_HTML_S_egrave; break;
+ case 233: pStr = OOO_STRING_SVTOOLS_HTML_S_eacute; break;
+ case 234: pStr = OOO_STRING_SVTOOLS_HTML_S_ecirc; break;
+ case 235: pStr = OOO_STRING_SVTOOLS_HTML_S_euml; break;
+ case 236: pStr = OOO_STRING_SVTOOLS_HTML_S_igrave; break;
+ case 237: pStr = OOO_STRING_SVTOOLS_HTML_S_iacute; break;
+ case 238: pStr = OOO_STRING_SVTOOLS_HTML_S_icirc; break;
+ case 239: pStr = OOO_STRING_SVTOOLS_HTML_S_iuml; break;
+ case 240: pStr = OOO_STRING_SVTOOLS_HTML_S_eth; break;
+ case 241: pStr = OOO_STRING_SVTOOLS_HTML_S_ntilde; break;
+ case 242: pStr = OOO_STRING_SVTOOLS_HTML_S_ograve; break;
+ case 243: pStr = OOO_STRING_SVTOOLS_HTML_S_oacute; break;
+ case 244: pStr = OOO_STRING_SVTOOLS_HTML_S_ocirc; break;
+ case 245: pStr = OOO_STRING_SVTOOLS_HTML_S_otilde; break;
+ case 246: pStr = OOO_STRING_SVTOOLS_HTML_S_ouml; break;
+ case 247: pStr = OOO_STRING_SVTOOLS_HTML_S_divide; break;
+ case 248: pStr = OOO_STRING_SVTOOLS_HTML_S_oslash; break;
+ case 249: pStr = OOO_STRING_SVTOOLS_HTML_S_ugrave; break;
+ case 250: pStr = OOO_STRING_SVTOOLS_HTML_S_uacute; break;
+ case 251: pStr = OOO_STRING_SVTOOLS_HTML_S_ucirc; break;
+ case 252: pStr = OOO_STRING_SVTOOLS_HTML_S_uuml; break;
+ case 253: pStr = OOO_STRING_SVTOOLS_HTML_S_yacute; break;
+ case 254: pStr = OOO_STRING_SVTOOLS_HTML_S_thorn; break;
+ case 255: pStr = OOO_STRING_SVTOOLS_HTML_S_yuml; break;
+
+ case 338: pStr = OOO_STRING_SVTOOLS_HTML_S_OElig; break;
+ case 339: pStr = OOO_STRING_SVTOOLS_HTML_S_oelig; break;
+ case 352: pStr = OOO_STRING_SVTOOLS_HTML_S_Scaron; break;
+ case 353: pStr = OOO_STRING_SVTOOLS_HTML_S_scaron; break;
+ case 376: pStr = OOO_STRING_SVTOOLS_HTML_S_Yuml; break;
+ case 402: pStr = OOO_STRING_SVTOOLS_HTML_S_fnof; break;
+ case 710: pStr = OOO_STRING_SVTOOLS_HTML_S_circ; break;
+ case 732: pStr = OOO_STRING_SVTOOLS_HTML_S_tilde; break;
+
+ // Greek chars are handled later,
+ // since they should *not* be transformed to entities
+ // when generating Greek text (== using Greek encoding)
+
+ case 8194: pStr = OOO_STRING_SVTOOLS_HTML_S_ensp; break;
+ case 8195: pStr = OOO_STRING_SVTOOLS_HTML_S_emsp; break;
+ case 8201: pStr = OOO_STRING_SVTOOLS_HTML_S_thinsp; break;
+ case 8204: pStr = OOO_STRING_SVTOOLS_HTML_S_zwnj; break;
+ case 8205: pStr = OOO_STRING_SVTOOLS_HTML_S_zwj; break;
+ case 8206: pStr = OOO_STRING_SVTOOLS_HTML_S_lrm; break;
+ case 8207: pStr = OOO_STRING_SVTOOLS_HTML_S_rlm; break;
+ case 8211: pStr = OOO_STRING_SVTOOLS_HTML_S_ndash; break;
+ case 8212: pStr = OOO_STRING_SVTOOLS_HTML_S_mdash; break;
+ case 8216: pStr = OOO_STRING_SVTOOLS_HTML_S_lsquo; break;
+ case 8217: pStr = OOO_STRING_SVTOOLS_HTML_S_rsquo; break;
+ case 8218: pStr = OOO_STRING_SVTOOLS_HTML_S_sbquo; break;
+ case 8220: pStr = OOO_STRING_SVTOOLS_HTML_S_ldquo; break;
+ case 8221: pStr = OOO_STRING_SVTOOLS_HTML_S_rdquo; break;
+ case 8222: pStr = OOO_STRING_SVTOOLS_HTML_S_bdquo; break;
+ case 8224: pStr = OOO_STRING_SVTOOLS_HTML_S_dagger; break;
+ case 8225: pStr = OOO_STRING_SVTOOLS_HTML_S_Dagger; break;
+ case 8226: pStr = OOO_STRING_SVTOOLS_HTML_S_bull; break;
+ case 8230: pStr = OOO_STRING_SVTOOLS_HTML_S_hellip; break;
+ case 8240: pStr = OOO_STRING_SVTOOLS_HTML_S_permil; break;
+ case 8242: pStr = OOO_STRING_SVTOOLS_HTML_S_prime; break;
+ case 8243: pStr = OOO_STRING_SVTOOLS_HTML_S_Prime; break;
+ case 8249: pStr = OOO_STRING_SVTOOLS_HTML_S_lsaquo; break;
+ case 8250: pStr = OOO_STRING_SVTOOLS_HTML_S_rsaquo; break;
+ case 8254: pStr = OOO_STRING_SVTOOLS_HTML_S_oline; break;
+ case 8260: pStr = OOO_STRING_SVTOOLS_HTML_S_frasl; break;
+ case 8364: pStr = OOO_STRING_SVTOOLS_HTML_S_euro; break;
+ case 8465: pStr = OOO_STRING_SVTOOLS_HTML_S_image; break;
+ case 8472: pStr = OOO_STRING_SVTOOLS_HTML_S_weierp; break;
+ case 8476: pStr = OOO_STRING_SVTOOLS_HTML_S_real; break;
+ case 8482: pStr = OOO_STRING_SVTOOLS_HTML_S_trade; break;
+ case 8501: pStr = OOO_STRING_SVTOOLS_HTML_S_alefsym; break;
+ case 8592: pStr = OOO_STRING_SVTOOLS_HTML_S_larr; break;
+ case 8593: pStr = OOO_STRING_SVTOOLS_HTML_S_uarr; break;
+ case 8594: pStr = OOO_STRING_SVTOOLS_HTML_S_rarr; break;
+ case 8595: pStr = OOO_STRING_SVTOOLS_HTML_S_darr; break;
+ case 8596: pStr = OOO_STRING_SVTOOLS_HTML_S_harr; break;
+ case 8629: pStr = OOO_STRING_SVTOOLS_HTML_S_crarr; break;
+ case 8656: pStr = OOO_STRING_SVTOOLS_HTML_S_lArr; break;
+ case 8657: pStr = OOO_STRING_SVTOOLS_HTML_S_uArr; break;
+ case 8658: pStr = OOO_STRING_SVTOOLS_HTML_S_rArr; break;
+ case 8659: pStr = OOO_STRING_SVTOOLS_HTML_S_dArr; break;
+ case 8660: pStr = OOO_STRING_SVTOOLS_HTML_S_hArr; break;
+ case 8704: pStr = OOO_STRING_SVTOOLS_HTML_S_forall; break;
+ case 8706: pStr = OOO_STRING_SVTOOLS_HTML_S_part; break;
+ case 8707: pStr = OOO_STRING_SVTOOLS_HTML_S_exist; break;
+ case 8709: pStr = OOO_STRING_SVTOOLS_HTML_S_empty; break;
+ case 8711: pStr = OOO_STRING_SVTOOLS_HTML_S_nabla; break;
+ case 8712: pStr = OOO_STRING_SVTOOLS_HTML_S_isin; break;
+ case 8713: pStr = OOO_STRING_SVTOOLS_HTML_S_notin; break;
+ case 8715: pStr = OOO_STRING_SVTOOLS_HTML_S_ni; break;
+ case 8719: pStr = OOO_STRING_SVTOOLS_HTML_S_prod; break;
+ case 8721: pStr = OOO_STRING_SVTOOLS_HTML_S_sum; break;
+ case 8722: pStr = OOO_STRING_SVTOOLS_HTML_S_minus; break;
+ case 8727: pStr = OOO_STRING_SVTOOLS_HTML_S_lowast; break;
+ case 8730: pStr = OOO_STRING_SVTOOLS_HTML_S_radic; break;
+ case 8733: pStr = OOO_STRING_SVTOOLS_HTML_S_prop; break;
+ case 8734: pStr = OOO_STRING_SVTOOLS_HTML_S_infin; break;
+ case 8736: pStr = OOO_STRING_SVTOOLS_HTML_S_ang; break;
+ case 8743: pStr = OOO_STRING_SVTOOLS_HTML_S_and; break;
+ case 8744: pStr = OOO_STRING_SVTOOLS_HTML_S_or; break;
+ case 8745: pStr = OOO_STRING_SVTOOLS_HTML_S_cap; break;
+ case 8746: pStr = OOO_STRING_SVTOOLS_HTML_S_cup; break;
+ case 8747: pStr = OOO_STRING_SVTOOLS_HTML_S_int; break;
+ case 8756: pStr = OOO_STRING_SVTOOLS_HTML_S_there4; break;
+ case 8764: pStr = OOO_STRING_SVTOOLS_HTML_S_sim; break;
+ case 8773: pStr = OOO_STRING_SVTOOLS_HTML_S_cong; break;
+ case 8776: pStr = OOO_STRING_SVTOOLS_HTML_S_asymp; break;
+ case 8800: pStr = OOO_STRING_SVTOOLS_HTML_S_ne; break;
+ case 8801: pStr = OOO_STRING_SVTOOLS_HTML_S_equiv; break;
+ case 8804: pStr = OOO_STRING_SVTOOLS_HTML_S_le; break;
+ case 8805: pStr = OOO_STRING_SVTOOLS_HTML_S_ge; break;
+ case 8834: pStr = OOO_STRING_SVTOOLS_HTML_S_sub; break;
+ case 8835: pStr = OOO_STRING_SVTOOLS_HTML_S_sup; break;
+ case 8836: pStr = OOO_STRING_SVTOOLS_HTML_S_nsub; break;
+ case 8838: pStr = OOO_STRING_SVTOOLS_HTML_S_sube; break;
+ case 8839: pStr = OOO_STRING_SVTOOLS_HTML_S_supe; break;
+ case 8853: pStr = OOO_STRING_SVTOOLS_HTML_S_oplus; break;
+ case 8855: pStr = OOO_STRING_SVTOOLS_HTML_S_otimes; break;
+ case 8869: pStr = OOO_STRING_SVTOOLS_HTML_S_perp; break;
+ case 8901: pStr = OOO_STRING_SVTOOLS_HTML_S_sdot; break;
+ case 8968: pStr = OOO_STRING_SVTOOLS_HTML_S_lceil; break;
+ case 8969: pStr = OOO_STRING_SVTOOLS_HTML_S_rceil; break;
+ case 8970: pStr = OOO_STRING_SVTOOLS_HTML_S_lfloor; break;
+ case 8971: pStr = OOO_STRING_SVTOOLS_HTML_S_rfloor; break;
+ case 9001: pStr = OOO_STRING_SVTOOLS_HTML_S_lang; break;
+ case 9002: pStr = OOO_STRING_SVTOOLS_HTML_S_rang; break;
+ case 9674: pStr = OOO_STRING_SVTOOLS_HTML_S_loz; break;
+ case 9824: pStr = OOO_STRING_SVTOOLS_HTML_S_spades; break;
+ case 9827: pStr = OOO_STRING_SVTOOLS_HTML_S_clubs; break;
+ case 9829: pStr = OOO_STRING_SVTOOLS_HTML_S_hearts; break;
+ case 9830: pStr = OOO_STRING_SVTOOLS_HTML_S_diams; break;
+ }
+
+ // Greek chars: if we do not produce a Greek encoding,
+ // transform them into entities
+ if( !pStr &&
+ ( eDestEnc != RTL_TEXTENCODING_ISO_8859_7 ) &&
+ ( eDestEnc != RTL_TEXTENCODING_MS_1253 ) )
+ {
+ switch( c )
+ {
+ case 913: pStr = OOO_STRING_SVTOOLS_HTML_S_Alpha; break;
+ case 914: pStr = OOO_STRING_SVTOOLS_HTML_S_Beta; break;
+ case 915: pStr = OOO_STRING_SVTOOLS_HTML_S_Gamma; break;
+ case 916: pStr = OOO_STRING_SVTOOLS_HTML_S_Delta; break;
+ case 917: pStr = OOO_STRING_SVTOOLS_HTML_S_Epsilon; break;
+ case 918: pStr = OOO_STRING_SVTOOLS_HTML_S_Zeta; break;
+ case 919: pStr = OOO_STRING_SVTOOLS_HTML_S_Eta; break;
+ case 920: pStr = OOO_STRING_SVTOOLS_HTML_S_Theta; break;
+ case 921: pStr = OOO_STRING_SVTOOLS_HTML_S_Iota; break;
+ case 922: pStr = OOO_STRING_SVTOOLS_HTML_S_Kappa; break;
+ case 923: pStr = OOO_STRING_SVTOOLS_HTML_S_Lambda; break;
+ case 924: pStr = OOO_STRING_SVTOOLS_HTML_S_Mu; break;
+ case 925: pStr = OOO_STRING_SVTOOLS_HTML_S_Nu; break;
+ case 926: pStr = OOO_STRING_SVTOOLS_HTML_S_Xi; break;
+ case 927: pStr = OOO_STRING_SVTOOLS_HTML_S_Omicron; break;
+ case 928: pStr = OOO_STRING_SVTOOLS_HTML_S_Pi; break;
+ case 929: pStr = OOO_STRING_SVTOOLS_HTML_S_Rho; break;
+ case 931: pStr = OOO_STRING_SVTOOLS_HTML_S_Sigma; break;
+ case 932: pStr = OOO_STRING_SVTOOLS_HTML_S_Tau; break;
+ case 933: pStr = OOO_STRING_SVTOOLS_HTML_S_Upsilon; break;
+ case 934: pStr = OOO_STRING_SVTOOLS_HTML_S_Phi; break;
+ case 935: pStr = OOO_STRING_SVTOOLS_HTML_S_Chi; break;
+ case 936: pStr = OOO_STRING_SVTOOLS_HTML_S_Psi; break;
+ case 937: pStr = OOO_STRING_SVTOOLS_HTML_S_Omega; break;
+ case 945: pStr = OOO_STRING_SVTOOLS_HTML_S_alpha; break;
+ case 946: pStr = OOO_STRING_SVTOOLS_HTML_S_beta; break;
+ case 947: pStr = OOO_STRING_SVTOOLS_HTML_S_gamma; break;
+ case 948: pStr = OOO_STRING_SVTOOLS_HTML_S_delta; break;
+ case 949: pStr = OOO_STRING_SVTOOLS_HTML_S_epsilon; break;
+ case 950: pStr = OOO_STRING_SVTOOLS_HTML_S_zeta; break;
+ case 951: pStr = OOO_STRING_SVTOOLS_HTML_S_eta; break;
+ case 952: pStr = OOO_STRING_SVTOOLS_HTML_S_theta; break;
+ case 953: pStr = OOO_STRING_SVTOOLS_HTML_S_iota; break;
+ case 954: pStr = OOO_STRING_SVTOOLS_HTML_S_kappa; break;
+ case 955: pStr = OOO_STRING_SVTOOLS_HTML_S_lambda; break;
+ case 956: pStr = OOO_STRING_SVTOOLS_HTML_S_mu; break;
+ case 957: pStr = OOO_STRING_SVTOOLS_HTML_S_nu; break;
+ case 958: pStr = OOO_STRING_SVTOOLS_HTML_S_xi; break;
+ case 959: pStr = OOO_STRING_SVTOOLS_HTML_S_omicron; break;
+ case 960: pStr = OOO_STRING_SVTOOLS_HTML_S_pi; break;
+ case 961: pStr = OOO_STRING_SVTOOLS_HTML_S_rho; break;
+ case 962: pStr = OOO_STRING_SVTOOLS_HTML_S_sigmaf; break;
+ case 963: pStr = OOO_STRING_SVTOOLS_HTML_S_sigma; break;
+ case 964: pStr = OOO_STRING_SVTOOLS_HTML_S_tau; break;
+ case 965: pStr = OOO_STRING_SVTOOLS_HTML_S_upsilon; break;
+ case 966: pStr = OOO_STRING_SVTOOLS_HTML_S_phi; break;
+ case 967: pStr = OOO_STRING_SVTOOLS_HTML_S_chi; break;
+ case 968: pStr = OOO_STRING_SVTOOLS_HTML_S_psi; break;
+ case 969: pStr = OOO_STRING_SVTOOLS_HTML_S_omega; break;
+ case 977: pStr = OOO_STRING_SVTOOLS_HTML_S_thetasym;break;
+ case 978: pStr = OOO_STRING_SVTOOLS_HTML_S_upsih; break;
+ case 982: pStr = OOO_STRING_SVTOOLS_HTML_S_piv; break;
+ }
+ }
+
+ return pStr;
+}
+
+static sal_Size lcl_FlushContext(char* pBuffer, sal_uInt32 nFlags)
+{
+ sal_uInt32 nInfo = 0;
+ sal_Size nSrcChars;
+ sal_Size nLen = convertUnicodeToText(nullptr, 0,
+ pBuffer, TXTCONV_BUFFER_SIZE, nFlags|RTL_UNICODETOTEXT_FLAGS_FLUSH,
+ &nInfo, &nSrcChars);
+ DBG_ASSERT((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR|RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) == 0, "HTMLOut: error while flushing");
+ return nLen;
+}
+
+static OString lcl_ConvertCharToHTML( sal_uInt32 c,
+ OUString *pNonConvertableChars )
+{
+ assert(rtl::isUnicodeCodePoint(c));
+
+ OStringBuffer aDest;
+ const char *pStr = nullptr;
+ switch( c )
+ {
+ case 0xA0: // is a hard blank
+ pStr = OOO_STRING_SVTOOLS_HTML_S_nbsp;
+ break;
+ case 0x2011: // is a hard hyphen
+ pStr = "#8209";
+ break;
+ case 0xAD: // is a soft hyphen
+ pStr = OOO_STRING_SVTOOLS_HTML_S_shy;
+ break;
+ default:
+ // There may be an entity for the character.
+ // The new HTML4 entities above 255 are not used for UTF-8,
+ // because Netscape 4 does support UTF-8 but does not support
+ // these entities.
+ if( c < 128 )
+ pStr = lcl_svhtml_GetEntityForChar( c, RTL_TEXTENCODING_UTF8 );
+ break;
+ }
+
+ char cBuffer[TXTCONV_BUFFER_SIZE];
+ const sal_uInt32 nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE|
+ RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE|
+ RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR|
+ RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR;
+ if( pStr )
+ {
+ sal_Size nLen = lcl_FlushContext(cBuffer, nFlags);
+ char *pBuffer = cBuffer;
+ while( nLen-- )
+ aDest.append(*pBuffer++);
+ aDest.append('&').append(pStr).append(';');
+ }
+ else
+ {
+ sal_uInt32 nInfo = 0;
+ sal_Size nSrcChars;
+
+ sal_Unicode utf16[2];
+ auto n = rtl::splitSurrogates(c, utf16);
+ sal_Size nLen = convertUnicodeToText(utf16, n,
+ cBuffer, TXTCONV_BUFFER_SIZE,
+ nFlags, &nInfo, &nSrcChars);
+ if( nLen > 0 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR|RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) == 0 )
+ {
+ char *pBuffer = cBuffer;
+ while( nLen-- )
+ aDest.append(*pBuffer++);
+ }
+ else
+ {
+ // If the character could not be converted to the destination
+ // character set, the UNICODE character is exported as character
+ // entity.
+ // coverity[callee_ptr_arith] - its ok
+ nLen = lcl_FlushContext(cBuffer, nFlags);
+ char *pBuffer = cBuffer;
+ while( nLen-- )
+ aDest.append(*pBuffer++);
+
+ aDest.append('&').append('#').append(static_cast<sal_Int32>(c))
+ // Unicode code points guaranteed to fit into sal_Int32
+ .append(';');
+ if( pNonConvertableChars )
+ {
+ OUString cs(&c, 1);
+ if( -1 == pNonConvertableChars->indexOf( cs ) )
+ (*pNonConvertableChars) += cs;
+ }
+ }
+ }
+ return aDest.makeStringAndClear();
+}
+
+static OString lcl_FlushToAscii()
+{
+ OStringBuffer aDest;
+
+ char cBuffer[TXTCONV_BUFFER_SIZE];
+ const sal_uInt32 nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE|
+ RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE|
+ RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR|
+ RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR;
+ sal_Size nLen = lcl_FlushContext(cBuffer, nFlags);
+ char *pBuffer = cBuffer;
+ while( nLen-- )
+ aDest.append(*pBuffer++);
+ return aDest.makeStringAndClear();
+}
+
+OString HTMLOutFuncs::ConvertStringToHTML( const OUString& rSrc,
+ OUString *pNonConvertableChars )
+{
+ OStringBuffer aDest;
+ for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; )
+ aDest.append(lcl_ConvertCharToHTML(
+ rSrc.iterateCodePoints(&i), pNonConvertableChars));
+ aDest.append(lcl_FlushToAscii());
+ return aDest.makeStringAndClear();
+}
+
+SvStream& HTMLOutFuncs::Out_AsciiTag( SvStream& rStream, std::string_view rStr,
+ bool bOn )
+{
+ if(bOn)
+ rStream.WriteCharPtr("<");
+ else
+ rStream.WriteCharPtr("</");
+
+ rStream.WriteOString(rStr).WriteChar('>');
+
+ return rStream;
+}
+
+SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_uInt32 c,
+ OUString *pNonConvertableChars )
+{
+ OString sOut = lcl_ConvertCharToHTML( c, pNonConvertableChars );
+ rStream.WriteOString( sOut );
+ return rStream;
+}
+
+SvStream& HTMLOutFuncs::Out_String( SvStream& rStream, const OUString& rOUStr,
+ OUString *pNonConvertableChars )
+{
+ sal_Int32 nLen = rOUStr.getLength();
+ for( sal_Int32 n = 0; n < nLen; )
+ HTMLOutFuncs::Out_Char( rStream, rOUStr.iterateCodePoints(&n),
+ pNonConvertableChars );
+ HTMLOutFuncs::FlushToAscii( rStream );
+ return rStream;
+}
+
+SvStream& HTMLOutFuncs::FlushToAscii( SvStream& rStream )
+{
+ OString sOut = lcl_FlushToAscii();
+
+ if (!sOut.isEmpty())
+ rStream.WriteOString( sOut );
+
+ return rStream;
+}
+
+SvStream& HTMLOutFuncs::Out_Hex( SvStream& rStream, sal_uInt32 nHex, sal_uInt8 nLen )
+{ // out into a stream
+ char aNToABuf[] = "0000000000000000";
+
+ DBG_ASSERT( nLen < sizeof(aNToABuf), "too many places" );
+ if( nLen>=sizeof(aNToABuf) )
+ nLen = (sizeof(aNToABuf)-1);
+
+ // set pointer to end of buffer
+ char *pStr = aNToABuf + (sizeof(aNToABuf)-1);
+ for( sal_uInt8 n = 0; n < nLen; ++n )
+ {
+ *(--pStr) = static_cast<char>(nHex & 0xf ) + 48;
+ if( *pStr > '9' )
+ *pStr += 39;
+ nHex >>= 4;
+ }
+ return rStream.WriteCharPtr( pStr );
+}
+
+
+SvStream& HTMLOutFuncs::Out_Color( SvStream& rStream, const Color& rColor, bool bXHTML )
+{
+ rStream.WriteCharPtr( "\"" );
+ if (bXHTML)
+ rStream.WriteCharPtr( "color: " );
+ rStream.WriteCharPtr( "#" );
+ if( rColor == COL_AUTO )
+ {
+ rStream.WriteCharPtr( "000000" );
+ }
+ else
+ {
+ Out_Hex( rStream, rColor.GetRed(), 2 );
+ Out_Hex( rStream, rColor.GetGreen(), 2 );
+ Out_Hex( rStream, rColor.GetBlue(), 2 );
+ }
+ rStream.WriteChar( '\"' );
+
+ return rStream;
+}
+
+SvStream& HTMLOutFuncs::Out_ImageMap( SvStream& rStream,
+ const OUString& rBaseURL,
+ const ImageMap& rIMap,
+ const OUString& rName,
+ const HTMLOutEvent *pEventTable,
+ bool bOutStarBasic,
+ const char *pDelim,
+ const char *pIndentArea,
+ const char *pIndentMap,
+ OUString *pNonConvertableChars )
+{
+ const OUString& rOutName = !rName.isEmpty() ? rName : rIMap.GetName();
+ DBG_ASSERT( !rOutName.isEmpty(), "No ImageMap-Name" );
+ if( rOutName.isEmpty() )
+ return rStream;
+
+ OStringBuffer sOut;
+ sOut.append(OString::Concat("<") +
+ OOO_STRING_SVTOOLS_HTML_map
+ " "
+ OOO_STRING_SVTOOLS_HTML_O_name
+ "=\"");
+ rStream.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStream, rOutName, pNonConvertableChars );
+ rStream.WriteCharPtr( "\">" );
+
+ for( size_t i=0; i<rIMap.GetIMapObjectCount(); i++ )
+ {
+ const IMapObject* pObj = rIMap.GetIMapObject( i );
+ DBG_ASSERT( pObj, "Where is the ImageMap-Object?" );
+
+ if( pObj )
+ {
+ const char *pShape = nullptr;
+ OString aCoords;
+ switch( pObj->GetType() )
+ {
+ case IMapObjectType::Rectangle:
+ {
+ const IMapRectangleObject* pRectObj =
+ static_cast<const IMapRectangleObject *>(pObj);
+ pShape = OOO_STRING_SVTOOLS_HTML_SH_rect;
+ tools::Rectangle aRect( pRectObj->GetRectangle() );
+
+ aCoords = OStringBuffer()
+ .append(static_cast<sal_Int32>(aRect.Left()))
+ .append(',')
+ .append(static_cast<sal_Int32>(aRect.Top()))
+ .append(',')
+ .append(static_cast<sal_Int32>(aRect.Right()))
+ .append(',')
+ .append(static_cast<sal_Int32>(aRect.Bottom()))
+ .makeStringAndClear();
+ }
+ break;
+ case IMapObjectType::Circle:
+ {
+ const IMapCircleObject* pCirc =
+ static_cast<const IMapCircleObject *>(pObj);
+ pShape= OOO_STRING_SVTOOLS_HTML_SH_circ;
+ Point aCenter( pCirc->GetCenter() );
+ tools::Long nOff = pCirc->GetRadius();
+
+ aCoords = OStringBuffer()
+ .append(static_cast<sal_Int32>(aCenter.X()))
+ .append(',')
+ .append(static_cast<sal_Int32>(aCenter.Y()))
+ .append(',')
+ .append(static_cast<sal_Int32>(nOff))
+ .makeStringAndClear();
+ }
+ break;
+ case IMapObjectType::Polygon:
+ {
+ const IMapPolygonObject* pPolyObj =
+ static_cast<const IMapPolygonObject *>(pObj);
+ pShape= OOO_STRING_SVTOOLS_HTML_SH_poly;
+ tools::Polygon aPoly( pPolyObj->GetPolygon() );
+ sal_uInt16 nCount = aPoly.GetSize();
+ OStringBuffer aTmpBuf;
+ if( nCount>0 )
+ {
+ const Point& rPoint = aPoly[0];
+ aTmpBuf.append(static_cast<sal_Int32>(rPoint.X()))
+ .append(',')
+ .append(static_cast<sal_Int32>(rPoint.Y()));
+ }
+ for( sal_uInt16 j=1; j<nCount; j++ )
+ {
+ const Point& rPoint = aPoly[j];
+ aTmpBuf.append(',')
+ .append(static_cast<sal_Int32>(rPoint.X()))
+ .append(',')
+ .append(static_cast<sal_Int32>(rPoint.Y()));
+ }
+ aCoords = aTmpBuf.makeStringAndClear();
+ }
+ break;
+ default:
+ DBG_ASSERT( pShape, "unknown IMapObject" );
+ break;
+ }
+
+ if( pShape )
+ {
+ if( pDelim )
+ rStream.WriteCharPtr( pDelim );
+ if( pIndentArea )
+ rStream.WriteCharPtr( pIndentArea );
+
+ sOut.append(OString::Concat("<") + OOO_STRING_SVTOOLS_HTML_area
+ " " OOO_STRING_SVTOOLS_HTML_O_shape
+ "=" + pShape + " "
+ OOO_STRING_SVTOOLS_HTML_O_coords "=\"" +
+ aCoords + "\" ");
+ rStream.WriteOString( sOut.makeStringAndClear() );
+
+ OUString aURL( pObj->GetURL() );
+ if( !aURL.isEmpty() && pObj->IsActive() )
+ {
+ aURL = URIHelper::simpleNormalizedMakeRelative(
+ rBaseURL, aURL );
+ sOut.append(OOO_STRING_SVTOOLS_HTML_O_href "=\"");
+ rStream.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStream, aURL, pNonConvertableChars ).WriteChar( '\"' );
+ }
+ else
+ rStream.WriteCharPtr( OOO_STRING_SVTOOLS_HTML_O_nohref );
+
+ const OUString& rObjName = pObj->GetName();
+ if( !rObjName.isEmpty() )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_name "=\"");
+ rStream.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStream, rObjName, pNonConvertableChars ).WriteChar( '\"' );
+ }
+
+ const OUString& rTarget = pObj->GetTarget();
+ if( !rTarget.isEmpty() && pObj->IsActive() )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_target "=\"");
+ rStream.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStream, rTarget, pNonConvertableChars ).WriteChar( '\"' );
+ }
+
+ OUString rDesc( pObj->GetAltText() );
+ if( rDesc.isEmpty() )
+ rDesc = pObj->GetDesc();
+
+ if( !rDesc.isEmpty() )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_alt "=\"");
+ rStream.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStream, rDesc, pNonConvertableChars ).WriteChar( '\"' );
+ }
+
+ const SvxMacroTableDtor& rMacroTab = pObj->GetMacroTable();
+ if( pEventTable && !rMacroTab.empty() )
+ Out_Events( rStream, rMacroTab, pEventTable,
+ bOutStarBasic, pNonConvertableChars );
+
+ rStream.WriteChar( '>' );
+ }
+ }
+
+ }
+
+ if( pDelim )
+ rStream.WriteCharPtr( pDelim );
+ if( pIndentMap )
+ rStream.WriteCharPtr( pIndentMap );
+ Out_AsciiTag( rStream, OOO_STRING_SVTOOLS_HTML_map, false );
+
+ return rStream;
+}
+
+SvStream& HTMLOutFuncs::OutScript( SvStream& rStrm,
+ const OUString& rBaseURL,
+ std::u16string_view rSource,
+ const OUString& rLanguage,
+ ScriptType eScriptType,
+ const OUString& rSrc,
+ const OUString *pSBLibrary,
+ const OUString *pSBModule,
+ OUString *pNonConvertableChars )
+{
+ // script is not indented!
+ OStringBuffer sOut;
+ sOut.append('<')
+ .append(OOO_STRING_SVTOOLS_HTML_script);
+
+ if( !rLanguage.isEmpty() )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_language "=\"");
+ rStrm.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStrm, rLanguage, pNonConvertableChars );
+ sOut.append('\"');
+ }
+
+ if( !rSrc.isEmpty() )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_src "=\"");
+ rStrm.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStrm, URIHelper::simpleNormalizedMakeRelative(rBaseURL, rSrc), pNonConvertableChars );
+ sOut.append('\"');
+ }
+
+ if( STARBASIC != eScriptType && pSBLibrary )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_sdlibrary "=\"");
+ rStrm.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStrm, *pSBLibrary, pNonConvertableChars );
+ sOut.append('\"');
+ }
+
+ if( STARBASIC != eScriptType && pSBModule )
+ {
+ sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_sdmodule "=\"");
+ rStrm.WriteOString( sOut.makeStringAndClear() );
+ Out_String( rStrm, *pSBModule, pNonConvertableChars );
+ sOut.append('\"');
+ }
+
+ sOut.append('>');
+
+ rStrm.WriteOString( sOut.makeStringAndClear() );
+
+ if( !rSource.empty() || pSBLibrary || pSBModule )
+ {
+ rStrm.WriteCharPtr( SAL_NEWLINE_STRING );
+
+ if( JAVASCRIPT != eScriptType )
+ {
+ rStrm.WriteCharPtr( "<!--" )
+ .WriteCharPtr( SAL_NEWLINE_STRING );
+ }
+
+ if( STARBASIC == eScriptType )
+ {
+ if( pSBLibrary )
+ {
+ sOut.append("' " OOO_STRING_SVTOOLS_HTML_SB_library " " +
+ OUStringToOString(*pSBLibrary, RTL_TEXTENCODING_UTF8));
+ rStrm.WriteOString( sOut.makeStringAndClear() ).WriteCharPtr( SAL_NEWLINE_STRING );
+ }
+
+ if( pSBModule )
+ {
+ sOut.append("' " OOO_STRING_SVTOOLS_HTML_SB_module " " +
+ OUStringToOString(*pSBModule, RTL_TEXTENCODING_UTF8));
+ rStrm.WriteOString( sOut.makeStringAndClear() ).WriteCharPtr( SAL_NEWLINE_STRING );
+ }
+ }
+
+ if( !rSource.empty() )
+ {
+ // we write the module in ANSI-charset, but with
+ // the system new line.
+ const OString sSource(OUStringToOString(rSource, RTL_TEXTENCODING_UTF8));
+ rStrm.WriteOString( sSource ).WriteCharPtr( SAL_NEWLINE_STRING );
+ }
+ rStrm.WriteCharPtr( SAL_NEWLINE_STRING );
+
+ if( JAVASCRIPT != eScriptType )
+ {
+ // MIB/MM: if it is not StarBasic, a // could be wrong.
+ // As the comment is removed during reading, it is not helping us...
+ rStrm.WriteCharPtr( STARBASIC == eScriptType ? "' -->" : "// -->" )
+ .WriteCharPtr( SAL_NEWLINE_STRING );
+ }
+ }
+
+ HTMLOutFuncs::Out_AsciiTag( rStrm, OOO_STRING_SVTOOLS_HTML_script, false );
+
+ return rStrm;
+}
+
+
+SvStream& HTMLOutFuncs::Out_Events( SvStream& rStrm,
+ const SvxMacroTableDtor& rMacroTable,
+ const HTMLOutEvent *pEventTable,
+ bool bOutStarBasic,
+ OUString *pNonConvertableChars )
+{
+ sal_uInt16 i=0;
+ while( pEventTable[i].pBasicName || pEventTable[i].pJavaName )
+ {
+ const SvxMacro *pMacro =
+ rMacroTable.Get( pEventTable[i].nEvent );
+
+ if( pMacro && pMacro->HasMacro() &&
+ ( JAVASCRIPT == pMacro->GetScriptType() || bOutStarBasic ))
+ {
+ const char *pStr = STARBASIC == pMacro->GetScriptType()
+ ? pEventTable[i].pBasicName
+ : pEventTable[i].pJavaName;
+
+ if( pStr )
+ {
+ OString sOut = OString::Concat(" ") + pStr + "=\"";
+ rStrm.WriteOString( sOut );
+
+ Out_String( rStrm, pMacro->GetMacName(), pNonConvertableChars ).WriteChar( '\"' );
+ }
+ }
+ i++;
+ }
+
+ return rStrm;
+}
+
+OString HTMLOutFuncs::CreateTableDataOptionsValNum(
+ bool bValue,
+ double fVal, sal_uInt32 nFormat, SvNumberFormatter& rFormatter,
+ OUString* pNonConvertableChars)
+{
+ OStringBuffer aStrTD;
+
+ if ( bValue )
+ {
+ // printf / scanf is not precise enough
+ OUString aValStr;
+ rFormatter.GetInputLineString( fVal, 0, aValStr );
+ OString sTmp(OUStringToOString(aValStr, RTL_TEXTENCODING_UTF8));
+ aStrTD.append(" " OOO_STRING_SVTOOLS_HTML_O_SDval "=\"" +
+ sTmp + "\"");
+ }
+ if ( bValue || nFormat )
+ {
+ aStrTD.append(" " OOO_STRING_SVTOOLS_HTML_O_SDnum "=\"" +
+ OString::number(static_cast<sal_uInt16>(
+ Application::GetSettings().GetLanguageTag().getLanguageType())) +
+ ";"); // Language for Format 0
+ if ( nFormat )
+ {
+ OString aNumStr;
+ LanguageType nLang;
+ const SvNumberformat* pFormatEntry = rFormatter.GetEntry( nFormat );
+ if ( pFormatEntry )
+ {
+ aNumStr = ConvertStringToHTML( pFormatEntry->GetFormatstring(),
+ pNonConvertableChars );
+ nLang = pFormatEntry->GetLanguage();
+ }
+ else
+ nLang = LANGUAGE_SYSTEM;
+ aStrTD.append(static_cast<sal_Int32>(static_cast<sal_uInt16>(nLang))).append(';').
+ append(aNumStr);
+ }
+ aStrTD.append('\"');
+ }
+ return aStrTD.makeStringAndClear();
+}
+
+bool HTMLOutFuncs::PrivateURLToInternalImg( OUString& rURL )
+{
+ if( rURL.startsWith(OOO_STRING_SVTOOLS_HTML_private_image) )
+ {
+ rURL = rURL.copy( strlen(OOO_STRING_SVTOOLS_HTML_private_image) );
+ return true;
+ }
+
+ return false;
+}
+
+void HtmlWriterHelper::applyColor(HtmlWriter& rHtmlWriter, std::string_view aAttributeName, const Color& rColor)
+{
+ OStringBuffer sBuffer;
+
+ if( rColor == COL_AUTO )
+ {
+ sBuffer.append("#000000");
+ }
+ else
+ {
+ sBuffer.append('#');
+ std::ostringstream sStringStream;
+ sStringStream
+ << std::right
+ << std::setfill('0')
+ << std::setw(6)
+ << std::hex
+ << sal_uInt32(rColor.GetRGBColor());
+ sBuffer.append(sStringStream.str().c_str());
+ }
+
+ rHtmlWriter.attribute(aAttributeName, sBuffer.makeStringAndClear());
+}
+
+
+void HtmlWriterHelper::applyEvents(HtmlWriter& rHtmlWriter, const SvxMacroTableDtor& rMacroTable, const HTMLOutEvent* pEventTable, bool bOutStarBasic)
+{
+ sal_uInt16 i = 0;
+ while (pEventTable[i].pBasicName || pEventTable[i].pJavaName)
+ {
+ const SvxMacro* pMacro = rMacroTable.Get(pEventTable[i].nEvent);
+
+ if (pMacro && pMacro->HasMacro() && (JAVASCRIPT == pMacro->GetScriptType() || bOutStarBasic))
+ {
+ const char* pAttributeName = nullptr;
+ if (STARBASIC == pMacro->GetScriptType())
+ pAttributeName = pEventTable[i].pBasicName;
+ else
+ pAttributeName = pEventTable[i].pJavaName;
+
+ if (pAttributeName)
+ {
+ rHtmlWriter.attribute(pAttributeName, OUStringToOString(pMacro->GetMacName(), RTL_TEXTENCODING_UTF8));
+ }
+ }
+ i++;
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/svtools/source/svhtml/htmlsupp.cxx b/svtools/source/svhtml/htmlsupp.cxx
new file mode 100644
index 000000000..a418d61eb
--- /dev/null
+++ b/svtools/source/svhtml/htmlsupp.cxx
@@ -0,0 +1,159 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <comphelper/string.hxx>
+#include <svtools/parhtml.hxx>
+#include <svtools/htmltokn.h>
+#include <svtools/htmlkywd.hxx>
+#include <tools/urlobj.hxx>
+
+// Table for converting option values into strings
+HTMLOptionEnum<HTMLScriptLanguage> const aScriptLangOptEnums[] =
+{
+ { OOO_STRING_SVTOOLS_HTML_LG_starbasic, HTMLScriptLanguage::StarBasic },
+ { OOO_STRING_SVTOOLS_HTML_LG_javascript, HTMLScriptLanguage::JavaScript },
+ { OOO_STRING_SVTOOLS_HTML_LG_javascript11, HTMLScriptLanguage::JavaScript },
+ { OOO_STRING_SVTOOLS_HTML_LG_livescript, HTMLScriptLanguage::JavaScript },
+ { nullptr, HTMLScriptLanguage(0) }
+};
+
+void HTMLParser::ParseScriptOptions( OUString& rLangString, std::u16string_view rBaseURL,
+ HTMLScriptLanguage& rLang,
+ OUString& rSrc,
+ OUString& rLibrary,
+ OUString& rModule )
+{
+ const HTMLOptions& aScriptOptions = GetOptions();
+
+ rLangString.clear();
+ rLang = HTMLScriptLanguage::JavaScript;
+ rSrc.clear();
+ rLibrary.clear();
+ rModule.clear();
+
+ for( size_t i = aScriptOptions.size(); i; )
+ {
+ const HTMLOption& aOption = aScriptOptions[--i];
+ switch( aOption.GetToken() )
+ {
+ case HtmlOptionId::LANGUAGE:
+ {
+ rLangString = aOption.GetString();
+ HTMLScriptLanguage nLang;
+ if( aOption.GetEnum( nLang, aScriptLangOptEnums ) )
+ rLang = nLang;
+ else
+ rLang = HTMLScriptLanguage::Unknown;
+ }
+ break;
+
+ case HtmlOptionId::SRC:
+ rSrc = INetURLObject::GetAbsURL( rBaseURL, aOption.GetString() );
+ break;
+ case HtmlOptionId::SDLIBRARY:
+ rLibrary = aOption.GetString();
+ break;
+
+ case HtmlOptionId::SDMODULE:
+ rModule = aOption.GetString();
+ break;
+ default: break;
+ }
+ }
+}
+
+void HTMLParser::RemoveSGMLComment( OUString &rString )
+{
+ sal_Unicode c = 0;
+ sal_Int32 idx = 0;
+ while (idx < rString.getLength())
+ {
+ c = rString[idx];
+ if (!( c==' ' || c=='\t' || c=='\r' || c=='\n' ) )
+ break;
+ idx++;
+ }
+ if (idx)
+ rString = rString.copy( idx );
+
+ idx = rString.getLength() - 1;
+ while (idx > 0)
+ // Can never get to 0 because that would mean the string contains only whitespace, and the first
+ // loop would already have removed all of those.
+ {
+ c = rString[idx];
+ if (!( c==' ' || c=='\t' || c=='\r' || c=='\n' ) )
+ break;
+ idx--;
+ }
+ if (idx != rString.getLength() - 1)
+ rString = rString.copy( 0, idx + 1 );
+
+ // remove SGML comments
+ if( rString.startsWith( "<!--" ) )
+ {
+ // the whole line
+ sal_Int32 nPos = 4;
+ while( nPos < rString.getLength() )
+ {
+ c = rString[nPos];
+ if (c == '\r' || c == '\n')
+ break;
+ ++nPos;
+ }
+ if( c == '\r' && nPos+1 < rString.getLength() &&
+ '\n' == rString[nPos+1] )
+ ++nPos;
+ else if( c != '\n' )
+ nPos = 3;
+ ++nPos;
+ rString = rString.copy( nPos );
+ }
+
+ if( !rString.endsWith("-->") )
+ return;
+
+ rString = rString.copy( 0, rString.getLength()-3 );
+ // "//" or "'", maybe preceding CR/LF
+ rString = comphelper::string::stripEnd(rString, ' ');
+ sal_Int32 nDel = 0, nLen = rString.getLength();
+ if( nLen >= 2 &&
+ rString.endsWith("//") )
+ {
+ nDel = 2;
+ }
+ else if( nLen && '\'' == rString[nLen-1] )
+ {
+ nDel = 1;
+ }
+ if( nDel && nLen >= nDel+1 )
+ {
+ c = rString[nLen-(nDel+1)];
+ if( '\r'==c || '\n'==c )
+ {
+ nDel++;
+ if( '\n'==c && nLen >= nDel+1 &&
+ '\r'==rString[nLen-(nDel+1)] )
+ nDel++;
+ }
+ }
+ rString = rString.copy( 0, nLen-nDel );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
new file mode 100644
index 000000000..70d1da950
--- /dev/null
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -0,0 +1,2200 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <comphelper/string.hxx>
+#include <o3tl/safeint.hxx>
+#include <o3tl/string_view.hxx>
+#include <tools/stream.hxx>
+#include <tools/debug.hxx>
+#include <tools/color.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <rtl/character.hxx>
+#include <rtl/tencinfo.h>
+#include <sal/log.hxx>
+#include <tools/tenccvt.hxx>
+#include <tools/datetime.hxx>
+#include <unotools/datetime.hxx>
+#include <svl/inettype.hxx>
+#include <svl/lngmisc.hxx>
+#include <com/sun/star/beans/PropertyAttribute.hpp>
+#include <com/sun/star/document/XDocumentProperties.hpp>
+
+#include <svtools/parhtml.hxx>
+#include <svtools/htmltokn.h>
+#include <svtools/htmlkywd.hxx>
+
+#include <utility>
+
+using namespace ::com::sun::star;
+
+
+const sal_Int32 MAX_LEN( 1024 );
+
+const sal_Int32 MAX_ENTITY_LEN( 8 );
+
+
+// Tables to convert option values into strings
+
+// <INPUT TYPE=xxx>
+HTMLOptionEnum<HTMLInputType> const aInputTypeOptEnums[] =
+{
+ { OOO_STRING_SVTOOLS_HTML_IT_text, HTMLInputType::Text },
+ { OOO_STRING_SVTOOLS_HTML_IT_password, HTMLInputType::Password },
+ { OOO_STRING_SVTOOLS_HTML_IT_checkbox, HTMLInputType::Checkbox },
+ { OOO_STRING_SVTOOLS_HTML_IT_radio, HTMLInputType::Radio },
+ { OOO_STRING_SVTOOLS_HTML_IT_range, HTMLInputType::Range },
+ { OOO_STRING_SVTOOLS_HTML_IT_scribble, HTMLInputType::Scribble },
+ { OOO_STRING_SVTOOLS_HTML_IT_file, HTMLInputType::File },
+ { OOO_STRING_SVTOOLS_HTML_IT_hidden, HTMLInputType::Hidden },
+ { OOO_STRING_SVTOOLS_HTML_IT_submit, HTMLInputType::Submit },
+ { OOO_STRING_SVTOOLS_HTML_IT_image, HTMLInputType::Image },
+ { OOO_STRING_SVTOOLS_HTML_IT_reset, HTMLInputType::Reset },
+ { OOO_STRING_SVTOOLS_HTML_IT_button, HTMLInputType::Button },
+ { nullptr, HTMLInputType(0) }
+};
+
+// <TABLE FRAME=xxx>
+HTMLOptionEnum<HTMLTableFrame> const aTableFrameOptEnums[] =
+{
+ { OOO_STRING_SVTOOLS_HTML_TF_void, HTMLTableFrame::Void },
+ { OOO_STRING_SVTOOLS_HTML_TF_above, HTMLTableFrame::Above },
+ { OOO_STRING_SVTOOLS_HTML_TF_below, HTMLTableFrame::Below },
+ { OOO_STRING_SVTOOLS_HTML_TF_hsides, HTMLTableFrame::HSides },
+ { OOO_STRING_SVTOOLS_HTML_TF_lhs, HTMLTableFrame::LHS },
+ { OOO_STRING_SVTOOLS_HTML_TF_rhs, HTMLTableFrame::RHS },
+ { OOO_STRING_SVTOOLS_HTML_TF_vsides, HTMLTableFrame::VSides },
+ { OOO_STRING_SVTOOLS_HTML_TF_box, HTMLTableFrame::Box },
+ { OOO_STRING_SVTOOLS_HTML_TF_border, HTMLTableFrame::Box },
+ { nullptr, HTMLTableFrame(0) }
+};
+
+// <TABLE RULES=xxx>
+HTMLOptionEnum<HTMLTableRules> const aTableRulesOptEnums[] =
+{
+ { OOO_STRING_SVTOOLS_HTML_TR_none, HTMLTableRules::NONE },
+ { OOO_STRING_SVTOOLS_HTML_TR_groups, HTMLTableRules::Groups },
+ { OOO_STRING_SVTOOLS_HTML_TR_rows, HTMLTableRules::Rows },
+ { OOO_STRING_SVTOOLS_HTML_TR_cols, HTMLTableRules::Cols },
+ { OOO_STRING_SVTOOLS_HTML_TR_all, HTMLTableRules::All },
+ { nullptr, HTMLTableRules(0) }
+};
+
+
+HTMLOption::HTMLOption( HtmlOptionId nTok, const OUString& rToken,
+ const OUString& rValue )
+ : aValue(rValue)
+ , aToken(rToken)
+ , nToken( nTok )
+{
+ DBG_ASSERT( nToken>=HtmlOptionId::BOOL_START && nToken<HtmlOptionId::END,
+ "HTMLOption: unknown token" );
+}
+
+sal_uInt32 HTMLOption::GetNumber() const
+{
+ DBG_ASSERT( (nToken>=HtmlOptionId::NUMBER_START &&
+ nToken<HtmlOptionId::NUMBER_END) ||
+ (nToken>=HtmlOptionId::CONTEXT_START &&
+ nToken<HtmlOptionId::CONTEXT_END) ||
+ nToken==HtmlOptionId::VALUE,
+ "GetNumber: Option not numerical" );
+ OUString aTmp(comphelper::string::stripStart(aValue, ' '));
+ sal_Int32 nTmp = aTmp.toInt32();
+ return nTmp >= 0 ? static_cast<sal_uInt32>(nTmp) : 0;
+}
+
+sal_Int32 HTMLOption::GetSNumber() const
+{
+ DBG_ASSERT( (nToken>=HtmlOptionId::NUMBER_START && nToken<HtmlOptionId::NUMBER_END) ||
+ (nToken>=HtmlOptionId::CONTEXT_START && nToken<HtmlOptionId::CONTEXT_END),
+ "GetSNumber: Option not numerical" );
+ OUString aTmp(comphelper::string::stripStart(aValue, ' '));
+ return aTmp.toInt32();
+}
+
+void HTMLOption::GetNumbers( std::vector<sal_uInt32> &rNumbers ) const
+{
+ rNumbers.clear();
+
+ // This is a very simplified scanner: it only searches all
+ // numerals in the string.
+ bool bInNum = false;
+ sal_uInt32 nNum = 0;
+ for( sal_Int32 i=0; i<aValue.getLength(); i++ )
+ {
+ sal_Unicode c = aValue[ i ];
+ if( c>='0' && c<='9' )
+ {
+ nNum *= 10;
+ nNum += (c - '0');
+ bInNum = true;
+ }
+ else if( bInNum )
+ {
+ rNumbers.push_back( nNum );
+ bInNum = false;
+ nNum = 0;
+ }
+ }
+ if( bInNum )
+ {
+ rNumbers.push_back( nNum );
+ }
+}
+
+void HTMLOption::GetColor( Color& rColor ) const
+{
+ DBG_ASSERT( (nToken>=HtmlOptionId::COLOR_START && nToken<HtmlOptionId::COLOR_END) || nToken==HtmlOptionId::SIZE,
+ "GetColor: Option is not a color." );
+
+ OUString aTmp(aValue.toAsciiLowerCase());
+ sal_uInt32 nColor = SAL_MAX_UINT32;
+ if (!aTmp.isEmpty() && aTmp[0] != '#')
+ nColor = GetHTMLColor(aTmp);
+
+ if( SAL_MAX_UINT32 == nColor )
+ {
+ nColor = 0;
+ sal_Int32 nPos = 0;
+ for (sal_uInt32 i=0; i<6; ++i)
+ {
+ // Whatever Netscape does to get color values,
+ // at maximum three characters < '0' are ignored.
+ sal_Unicode c = nPos<aTmp.getLength() ? aTmp[ nPos++ ] : '0';
+ if( c < '0' )
+ {
+ c = nPos<aTmp.getLength() ? aTmp[nPos++] : '0';
+ if( c < '0' )
+ c = nPos<aTmp.getLength() ? aTmp[nPos++] : '0';
+ }
+ nColor *= 16;
+ if( c >= '0' && c <= '9' )
+ nColor += (c - '0');
+ else if( c >= 'a' && c <= 'f' )
+ nColor += (c + 0xa - 'a');
+ }
+ }
+
+ rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000) >> 16) );
+ rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00) >> 8));
+ rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ff) );
+}
+
+HTMLInputType HTMLOption::GetInputType() const
+{
+ DBG_ASSERT( nToken==HtmlOptionId::TYPE, "GetInputType: Option not TYPE" );
+ return GetEnum( aInputTypeOptEnums, HTMLInputType::Text );
+}
+
+HTMLTableFrame HTMLOption::GetTableFrame() const
+{
+ DBG_ASSERT( nToken==HtmlOptionId::FRAME, "GetTableFrame: Option not FRAME" );
+ return GetEnum( aTableFrameOptEnums );
+}
+
+HTMLTableRules HTMLOption::GetTableRules() const
+{
+ DBG_ASSERT( nToken==HtmlOptionId::RULES, "GetTableRules: Option not RULES" );
+ return GetEnum( aTableRulesOptEnums );
+}
+
+HTMLParser::HTMLParser( SvStream& rIn, bool bReadNewDoc ) :
+ SvParser<HtmlTokenId>( rIn ),
+ bNewDoc(bReadNewDoc),
+ bIsInHeader(true),
+ bReadListing(false),
+ bReadXMP(false),
+ bReadPRE(false),
+ bReadTextArea(false),
+ bReadScript(false),
+ bReadStyle(false),
+ bEndTokenFound(false),
+ bPre_IgnoreNewPara(false),
+ bReadNextChar(false),
+ bReadComment(false),
+ nPre_LinePos(0),
+ mnPendingOffToken(HtmlTokenId::NONE)
+{
+ //#i76649, default to UTF-8 for HTML unless we know differently
+ SetSrcEncoding(RTL_TEXTENCODING_UTF8);
+}
+
+HTMLParser::~HTMLParser()
+{
+}
+
+void HTMLParser::SetNamespace(std::u16string_view rNamespace)
+{
+ // Convert namespace alias to a prefix.
+ maNamespace = OUString::Concat(rNamespace) + ":";
+}
+
+namespace
+{
+ class RefGuard
+ {
+ private:
+ HTMLParser& m_rParser;
+ public:
+ RefGuard(HTMLParser& rParser)
+ : m_rParser(rParser)
+ {
+ m_rParser.AddFirstRef();
+ }
+
+ ~RefGuard()
+ {
+ if (m_rParser.GetStatus() != SvParserState::Pending)
+ m_rParser.ReleaseRef(); // Parser not needed anymore
+ }
+ };
+}
+
+SvParserState HTMLParser::CallParser()
+{
+ eState = SvParserState::Working;
+ nNextCh = GetNextChar();
+ SaveState( HtmlTokenId::NONE );
+
+ nPre_LinePos = 0;
+ bPre_IgnoreNewPara = false;
+
+ RefGuard aRefGuard(*this);
+
+ Continue( HtmlTokenId::NONE );
+
+ return eState;
+}
+
+void HTMLParser::Continue( HtmlTokenId nToken )
+{
+ if( nToken == HtmlTokenId::NONE )
+ nToken = GetNextToken();
+
+ while( IsParserWorking() )
+ {
+ SaveState( nToken );
+ nToken = FilterToken( nToken );
+
+ if( nToken != HtmlTokenId::NONE )
+ NextToken( nToken );
+
+ if( IsParserWorking() )
+ SaveState( HtmlTokenId::NONE ); // continue with new token
+
+ nToken = GetNextToken();
+ }
+}
+
+HtmlTokenId HTMLParser::FilterToken( HtmlTokenId nToken )
+{
+ switch( nToken )
+ {
+ case HtmlTokenId(EOF):
+ nToken = HtmlTokenId::NONE;
+ break; // don't pass
+
+ case HtmlTokenId::HEAD_OFF:
+ bIsInHeader = false;
+ break;
+
+ case HtmlTokenId::HEAD_ON:
+ bIsInHeader = true;
+ break;
+
+ case HtmlTokenId::BODY_ON:
+ bIsInHeader = false;
+ break;
+
+ case HtmlTokenId::FRAMESET_ON:
+ bIsInHeader = false;
+ break;
+
+ case HtmlTokenId::BODY_OFF:
+ bReadPRE = bReadListing = bReadXMP = false;
+ break;
+
+ case HtmlTokenId::HTML_OFF:
+ nToken = HtmlTokenId::NONE;
+ bReadPRE = bReadListing = bReadXMP = false;
+ break; // HtmlTokenId::ON hasn't been passed either !
+
+ case HtmlTokenId::PREFORMTXT_ON:
+ StartPRE();
+ break;
+
+ case HtmlTokenId::PREFORMTXT_OFF:
+ FinishPRE();
+ break;
+
+ case HtmlTokenId::LISTING_ON:
+ StartListing();
+ break;
+
+ case HtmlTokenId::LISTING_OFF:
+ FinishListing();
+ break;
+
+ case HtmlTokenId::XMP_ON:
+ StartXMP();
+ break;
+
+ case HtmlTokenId::XMP_OFF:
+ FinishXMP();
+ break;
+
+ default:
+ if( bReadPRE )
+ nToken = FilterPRE( nToken );
+ else if( bReadListing )
+ nToken = FilterListing( nToken );
+ else if( bReadXMP )
+ nToken = FilterXMP( nToken );
+
+ break;
+ }
+
+ return nToken;
+}
+
+namespace {
+
+constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; }
+
+}
+
+HtmlTokenId HTMLParser::ScanText( const sal_Unicode cBreak )
+{
+ OUStringBuffer sTmpBuffer( MAX_LEN );
+ bool bContinue = true;
+ bool bEqSignFound = false;
+ sal_uInt32 cQuote = 0U;
+
+ while( bContinue && IsParserWorking() )
+ {
+ bool bNextCh = true;
+ switch( nNextCh )
+ {
+ case '&':
+ bEqSignFound = false;
+ if( bReadXMP )
+ sTmpBuffer.append( '&' );
+ else
+ {
+ sal_uInt64 nStreamPos = rInput.Tell();
+ sal_uInt32 nLinePos = GetLinePos();
+
+ sal_uInt32 cChar = 0U;
+ if( '#' == (nNextCh = GetNextChar()) )
+ {
+ nNextCh = GetNextChar();
+ const bool bIsHex( 'x' == nNextCh );
+ const bool bIsDecOrHex( bIsHex || rtl::isAsciiDigit(nNextCh) );
+ if ( bIsDecOrHex )
+ {
+ if ( bIsHex )
+ {
+ nNextCh = GetNextChar();
+ while ( rtl::isAsciiHexDigit(nNextCh) )
+ {
+ cChar = cChar * 16U +
+ ( nNextCh <= '9'
+ ? sal_uInt32( nNextCh - '0' )
+ : ( nNextCh <= 'F'
+ ? sal_uInt32( nNextCh - 'A' + 10 )
+ : sal_uInt32( nNextCh - 'a' + 10 ) ) );
+ nNextCh = GetNextChar();
+ }
+ }
+ else
+ {
+ do
+ {
+ cChar = cChar * 10U + sal_uInt32( nNextCh - '0');
+ nNextCh = GetNextChar();
+ }
+ while( rtl::isAsciiDigit(nNextCh) );
+ }
+
+ if( RTL_TEXTENCODING_DONTKNOW != eSrcEnc &&
+ RTL_TEXTENCODING_UCS2 != eSrcEnc &&
+ RTL_TEXTENCODING_UTF8 != eSrcEnc &&
+ cChar < 256 )
+ {
+ const sal_uInt32 convertFlags =
+ RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT;
+
+ char cEncodedChar = static_cast<char>(cChar);
+ cChar = OUString(&cEncodedChar, 1, eSrcEnc, convertFlags).toChar();
+ if( 0U == cChar )
+ {
+ // If the character could not be
+ // converted, because a conversion is not
+ // available, do no conversion at all.
+ cChar = cEncodedChar;
+ }
+ }
+ }
+ else
+ nNextCh = 0U;
+
+ if (!rtl::isUnicodeCodePoint(cChar)
+ || (linguistic::IsControlChar(cChar)
+ && cChar != '\r' && cChar != '\n' && cChar != '\t'))
+ {
+ cChar = '?';
+ }
+ }
+ else if( rtl::isAsciiAlpha( nNextCh ) )
+ {
+ OUStringBuffer sEntityBuffer( MAX_ENTITY_LEN );
+ sal_Int32 nPos = 0;
+ do
+ {
+ sEntityBuffer.appendUtf32( nNextCh );
+ nPos++;
+ nNextCh = GetNextChar();
+ }
+ while( nPos < MAX_ENTITY_LEN && rtl::isAsciiAlphanumeric( nNextCh ) &&
+ !rInput.eof() );
+
+ if( IsParserWorking() && !rInput.eof() )
+ {
+ OUString sEntity(sEntityBuffer.getStr(), nPos);
+ cChar = GetHTMLCharName( sEntity );
+
+ // not found ( == 0 ): plain text
+ // or a character which is inserted as attribute
+ if( 0U == cChar && ';' != nNextCh )
+ {
+ DBG_ASSERT( rInput.Tell() - nStreamPos ==
+ static_cast<sal_uInt64>(nPos+1)*GetCharSize(),
+ "UTF-8 is failing here" );
+ for( sal_Int32 i = nPos-1; i>1; i-- )
+ {
+ nNextCh = sEntityBuffer[i];
+ sEntityBuffer.setLength( i );
+ sEntity = OUString(sEntityBuffer.getStr(), i);
+ cChar = GetHTMLCharName( sEntity );
+ if( cChar )
+ {
+ rInput.SeekRel( -static_cast<sal_Int64>
+ (nPos-i)*GetCharSize() );
+ nlLinePos -= sal_uInt32(nPos-i);
+ nPos = i;
+ ClearTxtConvContext();
+ break;
+ }
+ }
+ }
+
+ if( !cChar ) // unknown character?
+ {
+ // back in stream, insert '&'
+ // and restart with next character
+ sTmpBuffer.append( '&' );
+
+ DBG_ASSERT( rInput.Tell()-nStreamPos ==
+ static_cast<sal_uInt64>(nPos+1)*GetCharSize(),
+ "Wrong stream position" );
+ DBG_ASSERT( nlLinePos-nLinePos ==
+ static_cast<sal_uInt32>(nPos+1),
+ "Wrong line position" );
+ rInput.Seek( nStreamPos );
+ nlLinePos = nLinePos;
+ ClearTxtConvContext();
+ break;
+ }
+
+ assert(cChar != 0);
+
+ // 1 == Non Breaking Space
+ // 2 == SoftHyphen
+
+ if (cChar == 1 || cChar == 2)
+ {
+ if( '>' == cBreak )
+ {
+ // When reading the content of a tag we have
+ // to change it to ' ' or '-'
+ if( 1U == cChar )
+ cChar = ' ';
+ else //2U
+ cChar = '-';
+ }
+ else
+ {
+ // If not scanning a tag return token
+ aToken.append( sTmpBuffer );
+ sTmpBuffer.setLength(0);
+
+ if( !aToken.isEmpty() )
+ {
+ // restart with character
+ nNextCh = '&';
+ DBG_ASSERT( rInput.Tell()-nStreamPos ==
+ static_cast<sal_uInt64>(nPos+1)*GetCharSize(),
+ "Wrong stream position" );
+ DBG_ASSERT( nlLinePos-nLinePos ==
+ static_cast<sal_uInt32>(nPos+1),
+ "Wrong line position" );
+ rInput.Seek( nStreamPos );
+ nlLinePos = nLinePos;
+ ClearTxtConvContext();
+ return HtmlTokenId::TEXTTOKEN;
+ }
+
+ // Hack: _GetNextChar shall not read the
+ // next character
+ if( ';' != nNextCh )
+ aToken.append( " " );
+ if( 1U == cChar )
+ return HtmlTokenId::NONBREAKSPACE;
+ else //2U
+ return HtmlTokenId::SOFTHYPH;
+ }
+ }
+ }
+ else
+ nNextCh = 0U;
+ }
+ // &{...};-JavaScript-Macros are not supported any longer.
+ else if( IsParserWorking() )
+ {
+ sTmpBuffer.append( '&' );
+ bNextCh = false;
+ break;
+ }
+
+ bNextCh = (';' == nNextCh);
+ if( cBreak=='>' && (cChar=='\\' || cChar=='\'' ||
+ cChar=='\"' || cChar==' ') )
+ {
+ // ' and " have to be escaped within tags to separate
+ // them from ' and " enclosing options.
+ // \ has to be escaped as well.
+ // Space is protected because it's not a delimiter between
+ // options.
+ sTmpBuffer.append( '\\' );
+ }
+ if( IsParserWorking() )
+ {
+ if( cChar )
+ sTmpBuffer.appendUtf32( cChar );
+ }
+ else if( SvParserState::Pending==eState && '>'!=cBreak )
+ {
+ // Restart with '&', the remainder is returned as
+ // text token.
+ if( !aToken.isEmpty() || !sTmpBuffer.isEmpty() )
+ {
+ // _GetNextChar() returns the previous text and
+ // during the next execution a new character is read.
+ // Thus we have to position in front of the '&'.
+ nNextCh = 0U;
+ rInput.Seek( nStreamPos - GetCharSize() );
+ nlLinePos = nLinePos-1;
+ ClearTxtConvContext();
+ bReadNextChar = true;
+ }
+ bNextCh = false;
+ }
+ }
+ break;
+ case '=':
+ if( '>'==cBreak && !cQuote )
+ bEqSignFound = true;
+ sTmpBuffer.appendUtf32( nNextCh );
+ break;
+
+ case '\\':
+ if( '>'==cBreak )
+ {
+ // mark within tags
+ sTmpBuffer.append( '\\' );
+ }
+ sTmpBuffer.append( '\\' );
+ break;
+
+ case '\"':
+ case '\'':
+ if( '>'==cBreak )
+ {
+ if( bEqSignFound )
+ cQuote = nNextCh;
+ else if( cQuote && (cQuote==nNextCh ) )
+ cQuote = 0U;
+ }
+ sTmpBuffer.appendUtf32( nNextCh );
+ bEqSignFound = false;
+ break;
+
+ case sal_Unicode(EOF):
+ if( rInput.eof() )
+ {
+ bContinue = false;
+ }
+ // else: ignore, not a valid code point
+ break;
+
+ case '<':
+ bEqSignFound = false;
+ if( '>'==cBreak )
+ sTmpBuffer.appendUtf32( nNextCh );
+ else
+ bContinue = false; // break, string is together
+ break;
+
+ case '\f':
+ if( '>' == cBreak )
+ {
+ // If scanning options treat it like a space, ...
+ sTmpBuffer.append( ' ' );
+ }
+ else
+ {
+ // otherwise it's a separate token.
+ bContinue = false;
+ }
+ break;
+
+ case '\r':
+ case '\n':
+ if( '>'==cBreak )
+ {
+ // cr/lf in tag is handled in GetNextToken_()
+ sTmpBuffer.appendUtf32( nNextCh );
+ break;
+ }
+ else if( bReadListing || bReadXMP || bReadPRE || bReadTextArea )
+ {
+ bContinue = false;
+ break;
+ }
+ // Reduce sequence of CR/LF/BLANK/TAB to a single blank
+ [[fallthrough]];
+ case '\t':
+ if( '\t'==nNextCh && bReadPRE && '>'!=cBreak )
+ {
+ // Pass Tabs up in <PRE>
+ bContinue = false;
+ break;
+ }
+ [[fallthrough]];
+ case '\x0b':
+ if( '\x0b'==nNextCh && (bReadPRE || bReadXMP ||bReadListing) &&
+ '>'!=cBreak )
+ {
+ break;
+ }
+ nNextCh = ' ';
+ [[fallthrough]];
+ case ' ':
+ sTmpBuffer.appendUtf32( nNextCh );
+ if( '>'!=cBreak && (!bReadListing && !bReadXMP &&
+ !bReadPRE && !bReadTextArea) )
+ {
+ // Reduce sequences of Blanks/Tabs/CR/LF to a single blank
+ do {
+ nNextCh = GetNextChar();
+ if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
+ {
+ if( !aToken.isEmpty() || sTmpBuffer.getLength() > 1 )
+ {
+ // Have seen s.th. aside from blanks?
+ aToken.append( sTmpBuffer );
+ sTmpBuffer.setLength(0);
+ return HtmlTokenId::TEXTTOKEN;
+ }
+ else
+ // Only read blanks: no text must be returned
+ // and GetNextToken_ has to read until EOF
+ return HtmlTokenId::NONE;
+ }
+ } while ( ' ' == nNextCh || '\t' == nNextCh ||
+ '\r' == nNextCh || '\n' == nNextCh ||
+ '\x0b' == nNextCh );
+ bNextCh = false;
+ }
+ break;
+
+ default:
+ bEqSignFound = false;
+ if (nNextCh == cBreak && !cQuote)
+ bContinue = false;
+ else
+ {
+ do {
+ if (!linguistic::IsControlChar(nNextCh))
+ {
+ // All remaining characters make their way into the text.
+ sTmpBuffer.appendUtf32( nNextCh );
+ }
+
+ nNextCh = GetNextChar();
+ if( ( sal_Unicode(EOF) == nNextCh && rInput.eof() ) ||
+ !IsParserWorking() )
+ {
+ if( !sTmpBuffer.isEmpty() )
+ aToken.append( sTmpBuffer );
+ return HtmlTokenId::TEXTTOKEN;
+ }
+ } while( rtl::isAsciiAlpha( nNextCh ) || rtl::isAsciiDigit( nNextCh ) );
+ bNextCh = false;
+ }
+ }
+
+ if( bContinue && bNextCh )
+ nNextCh = GetNextChar();
+ }
+
+ if( !sTmpBuffer.isEmpty() )
+ aToken.append( sTmpBuffer );
+
+ return HtmlTokenId::TEXTTOKEN;
+}
+
+HtmlTokenId HTMLParser::GetNextRawToken()
+{
+ OUStringBuffer sTmpBuffer( MAX_LEN );
+
+ if( bEndTokenFound )
+ {
+ // During the last execution we already found the end token,
+ // thus we don't have to search it again.
+ bReadScript = false;
+ bReadStyle = false;
+ aEndToken.clear();
+ bEndTokenFound = false;
+
+ return HtmlTokenId::NONE;
+ }
+
+ // Default return value: HtmlTokenId::RAWDATA
+ bool bContinue = true;
+ HtmlTokenId nToken = HtmlTokenId::RAWDATA;
+ SaveState( HtmlTokenId::NONE );
+ while( bContinue && IsParserWorking() )
+ {
+ bool bNextCh = true;
+ switch( nNextCh )
+ {
+ case '<':
+ {
+ // Maybe we've reached the end.
+
+ // Save what we have read previously...
+ aToken.append( sTmpBuffer );
+ sTmpBuffer.setLength(0);
+
+ // and remember position in stream.
+ sal_uInt64 nStreamPos = rInput.Tell();
+ sal_uInt32 nLineNr = GetLineNr();
+ sal_uInt32 nLinePos = GetLinePos();
+
+ // Start of an end token?
+ bool bOffState = false;
+ if( '/' == (nNextCh = GetNextChar()) )
+ {
+ bOffState = true;
+ nNextCh = GetNextChar();
+ }
+ else if( '!' == nNextCh )
+ {
+ sTmpBuffer.appendUtf32( nNextCh );
+ nNextCh = GetNextChar();
+ }
+
+ // Read following letters
+ while( (rtl::isAsciiAlpha(nNextCh) || '-'==nNextCh) &&
+ IsParserWorking() && sTmpBuffer.getLength() < MAX_LEN )
+ {
+ sTmpBuffer.appendUtf32( nNextCh );
+ nNextCh = GetNextChar();
+ }
+
+ OUString aTok( sTmpBuffer.toString() );
+ aTok = aTok.toAsciiLowerCase();
+ bool bDone = false;
+ if( bReadScript || !aEndToken.isEmpty() )
+ {
+ if( !bReadComment )
+ {
+ if( aTok.startsWith( OOO_STRING_SVTOOLS_HTML_comment ) )
+ {
+ bReadComment = true;
+ }
+ else
+ {
+ // A script has to end with "</SCRIPT>". But
+ // ">" is optional for security reasons
+ bDone = bOffState &&
+ ( bReadScript
+ ? aTok == OOO_STRING_SVTOOLS_HTML_script
+ : aTok == aEndToken );
+ }
+ }
+ if( bReadComment && '>'==nNextCh && aTok.endsWith( "--" ) )
+ {
+ // End of comment of style <!----->
+ bReadComment = false;
+ }
+ }
+ else
+ {
+ // Style sheets can be closed by </STYLE>, </HEAD> or <BODY>
+ if( bOffState )
+ bDone = aTok == OOO_STRING_SVTOOLS_HTML_style ||
+ aTok == OOO_STRING_SVTOOLS_HTML_head;
+ else
+ bDone = aTok == OOO_STRING_SVTOOLS_HTML_body;
+ }
+
+ if( bDone )
+ {
+ // Done! Return the previously read string (if requested)
+ // and continue.
+
+ bContinue = false;
+
+ // nToken==0 means, GetNextToken_ continues to read
+ if( aToken.isEmpty() && (bReadStyle || bReadScript) )
+ {
+ // Immediately close environment (or context?)
+ // and parse the end token
+ bReadScript = false;
+ bReadStyle = false;
+ aEndToken.clear();
+ nToken = HtmlTokenId::NONE;
+ }
+ else
+ {
+ // Keep bReadScript/bReadStyle alive
+ // and parse end token during next execution
+ bEndTokenFound = true;
+ }
+
+ // Move backwards in stream to '<'
+ rInput.Seek( nStreamPos );
+ SetLineNr( nLineNr );
+ SetLinePos( nLinePos );
+ ClearTxtConvContext();
+ nNextCh = '<';
+
+ // Don't append string to token.
+ sTmpBuffer.setLength( 0 );
+ }
+ else
+ {
+ // remember "</" , everything else we find in the buffer
+ aToken.append( "<" );
+ if( bOffState )
+ aToken.append( "/" );
+
+ bNextCh = false;
+ }
+ }
+ break;
+ case '-':
+ sTmpBuffer.appendUtf32( nNextCh );
+ if( bReadComment )
+ {
+ bool bTwoMinus = false;
+ nNextCh = GetNextChar();
+ while( '-' == nNextCh && IsParserWorking() )
+ {
+ bTwoMinus = true;
+ sTmpBuffer.appendUtf32( nNextCh );
+ nNextCh = GetNextChar();
+ }
+
+ if( '>' == nNextCh && IsParserWorking() && bTwoMinus )
+ bReadComment = false;
+
+ bNextCh = false;
+ }
+ break;
+
+ case '\r':
+ // \r\n? closes the current text token (even if it's empty)
+ nNextCh = GetNextChar();
+ if( nNextCh=='\n' )
+ nNextCh = GetNextChar();
+ bContinue = false;
+ break;
+ case '\n':
+ // \n closes the current text token (even if it's empty)
+ nNextCh = GetNextChar();
+ bContinue = false;
+ break;
+ case sal_Unicode(EOF):
+ // eof closes the current text token and behaves like having read
+ // an end token
+ if( rInput.eof() )
+ {
+ bContinue = false;
+ if( !aToken.isEmpty() || !sTmpBuffer.isEmpty() )
+ {
+ bEndTokenFound = true;
+ }
+ else
+ {
+ bReadScript = false;
+ bReadStyle = false;
+ aEndToken.clear();
+ nToken = HtmlTokenId::NONE;
+ }
+ }
+ break;
+ default:
+ if (!linguistic::IsControlChar(nNextCh) || nNextCh == '\t')
+ {
+ // all remaining characters are appended to the buffer
+ sTmpBuffer.appendUtf32( nNextCh );
+ }
+ break;
+ }
+
+ if( !bContinue && !sTmpBuffer.isEmpty() )
+ {
+ aToken.append( sTmpBuffer );
+ sTmpBuffer.setLength(0);
+ }
+
+ if( bContinue && bNextCh )
+ nNextCh = GetNextChar();
+ }
+
+ if( IsParserWorking() )
+ SaveState( HtmlTokenId::NONE );
+ else
+ nToken = HtmlTokenId::NONE;
+
+ return nToken;
+}
+
+// Scan next token
+HtmlTokenId HTMLParser::GetNextToken_()
+{
+ HtmlTokenId nRet = HtmlTokenId::NONE;
+ sSaveToken.clear();
+
+ if (mnPendingOffToken != HtmlTokenId::NONE)
+ {
+ // HtmlTokenId::<TOKEN>_OFF generated for HtmlTokenId::<TOKEN>_ON
+ nRet = mnPendingOffToken;
+ mnPendingOffToken = HtmlTokenId::NONE;
+ aToken.setLength( 0 );
+ return nRet;
+ }
+
+ // Delete options
+ maOptions.clear();
+
+ if( !IsParserWorking() ) // Don't continue if already an error occurred
+ return HtmlTokenId::NONE;
+
+ bool bReadNextCharSave = bReadNextChar;
+ if( bReadNextChar )
+ {
+ DBG_ASSERT( !bEndTokenFound,
+ "Read a character despite </SCRIPT> was read?" );
+ nNextCh = GetNextChar();
+ if( !IsParserWorking() ) // Don't continue if already an error occurred
+ return HtmlTokenId::NONE;
+ bReadNextChar = false;
+ }
+
+ if( bReadScript || bReadStyle || !aEndToken.isEmpty() )
+ {
+ nRet = GetNextRawToken();
+ if( nRet != HtmlTokenId::NONE || !IsParserWorking() )
+ return nRet;
+ }
+
+ do {
+ bool bNextCh = true;
+ switch( nNextCh )
+ {
+ case '<':
+ {
+ sal_uInt64 nStreamPos = rInput.Tell();
+ sal_uInt32 nLineNr = GetLineNr();
+ sal_uInt32 nLinePos = GetLinePos();
+
+ bool bOffState = false;
+ if( '/' == (nNextCh = GetNextChar()) )
+ {
+ bOffState = true;
+ nNextCh = GetNextChar();
+ }
+ // Assume '<?' is a start of an XML declaration, ignore it.
+ if (rtl::isAsciiAlpha(nNextCh) || nNextCh == '!' || nNextCh == '?')
+ {
+ OUStringBuffer sTmpBuffer;
+ do {
+ sTmpBuffer.appendUtf32( nNextCh );
+ nNextCh = GetNextChar();
+ if (std::u16string_view(sTmpBuffer) == u"![CDATA[")
+ {
+ break;
+ }
+ } while( '>' != nNextCh && '/' != nNextCh && !rtl::isAsciiWhiteSpace( nNextCh ) &&
+ !linguistic::IsControlChar(nNextCh) &&
+ IsParserWorking() && !rInput.eof() );
+
+ if( !sTmpBuffer.isEmpty() )
+ {
+ aToken.append( sTmpBuffer );
+ sTmpBuffer.setLength(0);
+ }
+
+ // Skip blanks
+ while( rtl::isAsciiWhiteSpace( nNextCh ) && IsParserWorking() )
+ nNextCh = GetNextChar();
+
+ if( !IsParserWorking() )
+ {
+ if( SvParserState::Pending == eState )
+ bReadNextChar = bReadNextCharSave;
+ break;
+ }
+
+ // Search token in table:
+ sSaveToken = aToken;
+ aToken = aToken.toString().toAsciiLowerCase();
+
+ if (!maNamespace.isEmpty() && o3tl::starts_with(aToken, maNamespace))
+ aToken.remove( 0, maNamespace.getLength());
+
+ if( HtmlTokenId::NONE == (nRet = GetHTMLToken( aToken )) )
+ // Unknown control
+ nRet = HtmlTokenId::UNKNOWNCONTROL_ON;
+
+ // If it's a token which can be switched off...
+ if( bOffState )
+ {
+ if( nRet >= HtmlTokenId::ONOFF_START )
+ {
+ // and there is an off token, return off token instead
+ nRet = static_cast<HtmlTokenId>(static_cast<int>(nRet) + 1);
+ }
+ else if( HtmlTokenId::LINEBREAK!=nRet || !maNamespace.isEmpty())
+ {
+ // and there is no off token, return unknown token.
+ // (except for </BR>, that is treated like <BR>)
+ // No exception for XHTML, though.
+ nRet = HtmlTokenId::UNKNOWNCONTROL_OFF;
+ }
+ }
+
+ if( nRet == HtmlTokenId::COMMENT )
+ {
+ // fix: due to being case sensitive use sSaveToken as start of comment
+ // and append a blank.
+ aToken = sSaveToken;
+ if( '>'!=nNextCh )
+ aToken.append( " " );
+ sal_uInt64 nCStreamPos = 0;
+ sal_uInt32 nCLineNr = 0;
+ sal_uInt32 nCLinePos = 0;
+ sal_Int32 nCStrLen = 0;
+
+ bool bDone = false;
+ // Read until closing -->. If not found restart at first >
+ sTmpBuffer = aToken;
+ while( !bDone && !rInput.eof() && IsParserWorking() )
+ {
+ if( '>'==nNextCh )
+ {
+ if( !nCStreamPos )
+ {
+ nCStreamPos = rInput.Tell();
+ nCStrLen = sTmpBuffer.getLength();
+ nCLineNr = GetLineNr();
+ nCLinePos = GetLinePos();
+ }
+ bDone = sTmpBuffer.getLength() >= 2 && sTmpBuffer[sTmpBuffer.getLength() - 2] == '-' && sTmpBuffer[sTmpBuffer.getLength() - 1] == '-';
+ if( !bDone )
+ sTmpBuffer.appendUtf32(nNextCh);
+ }
+ else if (!linguistic::IsControlChar(nNextCh)
+ || nNextCh == '\r' || nNextCh == '\n' || nNextCh == '\t')
+ {
+ sTmpBuffer.appendUtf32(nNextCh);
+ }
+ if( !bDone )
+ nNextCh = GetNextChar();
+ }
+ aToken = sTmpBuffer.makeStringAndClear();
+ if( !bDone && IsParserWorking() && nCStreamPos )
+ {
+ rInput.Seek( nCStreamPos );
+ SetLineNr( nCLineNr );
+ SetLinePos( nCLinePos );
+ ClearTxtConvContext();
+ aToken.truncate(nCStrLen);
+ nNextCh = '>';
+ }
+ }
+ else if (nRet == HtmlTokenId::CDATA)
+ {
+ // Read until the closing ]]>.
+ bool bDone = false;
+ while (!bDone && !rInput.eof() && IsParserWorking())
+ {
+ if (nNextCh == '>')
+ {
+ if (sTmpBuffer.getLength() >= 2)
+ {
+ bDone = sTmpBuffer[sTmpBuffer.getLength() - 2] == ']'
+ && sTmpBuffer[sTmpBuffer.getLength() - 1] == ']';
+ if (bDone)
+ {
+ // Ignore ]] at the end.
+ sTmpBuffer.setLength(sTmpBuffer.getLength() - 2);
+ }
+ }
+ if (!bDone)
+ {
+ sTmpBuffer.appendUtf32(nNextCh);
+ }
+ }
+ else if (!linguistic::IsControlChar(nNextCh))
+ {
+ sTmpBuffer.appendUtf32(nNextCh);
+ }
+ if (!bDone)
+ {
+ nNextCh = GetNextChar();
+ }
+ }
+ aToken = sTmpBuffer;
+ sTmpBuffer.setLength(0);
+ }
+ else
+ {
+ // TokenString not needed anymore
+ aToken.setLength( 0 );
+ }
+
+ // Read until closing '>'
+ if( '>' != nNextCh && IsParserWorking() )
+ {
+ ScanText( '>' );
+
+ // fdo#34666 fdo#36080 fdo#36390: closing "/>"?:
+ // generate pending HtmlTokenId::<TOKEN>_OFF for HtmlTokenId::<TOKEN>_ON
+ // Do not convert this to a single HtmlTokenId::<TOKEN>_OFF
+ // which lead to fdo#56772.
+ if ((nRet >= HtmlTokenId::ONOFF_START) && o3tl::ends_with(aToken, u"/"))
+ {
+ mnPendingOffToken = static_cast<HtmlTokenId>(static_cast<int>(nRet) + 1); // HtmlTokenId::<TOKEN>_ON -> HtmlTokenId::<TOKEN>_OFF
+ aToken.setLength( aToken.getLength()-1 ); // remove trailing '/'
+ }
+ if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
+ {
+ // Move back in front of < and restart there.
+ // Return < as text.
+ rInput.Seek( nStreamPos );
+ SetLineNr( nLineNr );
+ SetLinePos( nLinePos );
+ ClearTxtConvContext();
+
+ aToken = "<";
+ nRet = HtmlTokenId::TEXTTOKEN;
+ nNextCh = GetNextChar();
+ bNextCh = false;
+ break;
+ }
+ }
+ if( SvParserState::Pending == eState )
+ bReadNextChar = bReadNextCharSave;
+ }
+ else
+ {
+ if( bOffState )
+ {
+ // simply throw away everything
+ ScanText( '>' );
+ if( sal_Unicode(EOF) == nNextCh && rInput.eof() )
+ {
+ // Move back in front of < and restart there.
+ // Return < as text.
+ rInput.Seek( nStreamPos );
+ SetLineNr( nLineNr );
+ SetLinePos( nLinePos );
+ ClearTxtConvContext();
+
+ aToken = "<";
+ nRet = HtmlTokenId::TEXTTOKEN;
+ nNextCh = GetNextChar();
+ bNextCh = false;
+ break;
+ }
+ if( SvParserState::Pending == eState )
+ bReadNextChar = bReadNextCharSave;
+ aToken.setLength( 0 );
+ }
+ else if( '%' == nNextCh )
+ {
+ nRet = HtmlTokenId::UNKNOWNCONTROL_ON;
+
+ sal_uInt64 nCStreamPos = rInput.Tell();
+ sal_uInt32 nCLineNr = GetLineNr(), nCLinePos = GetLinePos();
+
+ bool bDone = false;
+ // Read until closing %>. If not found restart at first >.
+ sal_Unicode nLastTokenChar = !aToken.isEmpty() ? aToken[aToken.getLength() - 1] : 0;
+ OUStringBuffer aTmpBuffer(aToken);
+ while( !bDone && !rInput.eof() && IsParserWorking() )
+ {
+ bDone = '>'==nNextCh && nLastTokenChar == '%';
+ if( !bDone )
+ {
+ aTmpBuffer.appendUtf32(nNextCh);
+ nLastTokenChar = aTmpBuffer[aTmpBuffer.getLength() - 1];
+ nNextCh = GetNextChar();
+ }
+ }
+ if( !bDone && IsParserWorking() )
+ {
+ rInput.Seek( nCStreamPos );
+ SetLineNr( nCLineNr );
+ SetLinePos( nCLinePos );
+ ClearTxtConvContext();
+ aToken = "<%";
+ nRet = HtmlTokenId::TEXTTOKEN;
+ break;
+ }
+ aToken = aTmpBuffer.makeStringAndClear();
+ if( IsParserWorking() )
+ {
+ sSaveToken = aToken;
+ aToken.setLength( 0 );
+ }
+ }
+ else
+ {
+ aToken = "<";
+ nRet = HtmlTokenId::TEXTTOKEN;
+ bNextCh = false;
+ break;
+ }
+ }
+
+ if( IsParserWorking() )
+ {
+ bNextCh = '>' == nNextCh;
+ switch( nRet )
+ {
+ case HtmlTokenId::TEXTAREA_ON:
+ bReadTextArea = true;
+ break;
+ case HtmlTokenId::TEXTAREA_OFF:
+ bReadTextArea = false;
+ break;
+ case HtmlTokenId::SCRIPT_ON:
+ if( !bReadTextArea )
+ bReadScript = true;
+ break;
+ case HtmlTokenId::SCRIPT_OFF:
+ if( !bReadTextArea )
+ {
+ bReadScript = false;
+ // JavaScript might modify the stream,
+ // thus the last character has to be read again.
+ bReadNextChar = true;
+ bNextCh = false;
+ }
+ break;
+
+ case HtmlTokenId::STYLE_ON:
+ bReadStyle = true;
+ break;
+ case HtmlTokenId::STYLE_OFF:
+ bReadStyle = false;
+ break;
+ default: break;
+ }
+ }
+ }
+ break;
+
+ case sal_Unicode(EOF):
+ if( rInput.eof() )
+ {
+ eState = SvParserState::Accepted;
+ nRet = HtmlTokenId(nNextCh);
+ }
+ else
+ {
+ // Read normal text.
+ goto scan_text;
+ }
+ break;
+
+ case '\f':
+ // form feeds are passed upwards separately
+ nRet = HtmlTokenId::LINEFEEDCHAR; // !!! should be FORMFEEDCHAR
+ break;
+
+ case '\n':
+ case '\r':
+ if( bReadListing || bReadXMP || bReadPRE || bReadTextArea )
+ {
+ sal_Unicode c = GetNextChar();
+ if( ( '\n' != nNextCh || '\r' != c ) &&
+ ( '\r' != nNextCh || '\n' != c ) )
+ {
+ bNextCh = false;
+ nNextCh = c;
+ }
+ nRet = HtmlTokenId::NEWPARA;
+ break;
+ }
+ [[fallthrough]];
+ case '\t':
+ if( bReadPRE )
+ {
+ nRet = HtmlTokenId::TABCHAR;
+ break;
+ }
+ [[fallthrough]];
+ case ' ':
+ [[fallthrough]];
+ default:
+
+scan_text:
+ // "normal" text to come
+ nRet = ScanText();
+ bNextCh = 0 == aToken.getLength();
+
+ // the text should be processed
+ if( !bNextCh && eState == SvParserState::Pending )
+ {
+ eState = SvParserState::Working;
+ bReadNextChar = true;
+ }
+
+ break;
+ }
+
+ if( bNextCh && SvParserState::Working == eState )
+ {
+ nNextCh = GetNextChar();
+ if( SvParserState::Pending == eState && nRet != HtmlTokenId::NONE && HtmlTokenId::TEXTTOKEN != nRet )
+ {
+ bReadNextChar = true;
+ eState = SvParserState::Working;
+ }
+ }
+
+ } while( nRet == HtmlTokenId::NONE && SvParserState::Working == eState );
+
+ if( SvParserState::Pending == eState )
+ nRet = HtmlTokenId::INVALID; // s.th. invalid
+
+ return nRet;
+}
+
+void HTMLParser::UnescapeToken()
+{
+ sal_Int32 nPos=0;
+
+ bool bEscape = false;
+ while( nPos < aToken.getLength() )
+ {
+ bool bOldEscape = bEscape;
+ bEscape = false;
+ if( '\\'==aToken[nPos] && !bOldEscape )
+ {
+ aToken.remove( nPos, 1 );
+ bEscape = true;
+ }
+ else
+ {
+ nPos++;
+ }
+ }
+}
+
+const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken )
+{
+ // If the options for the current token have already been returned,
+ // return them once again.
+ if (!maOptions.empty())
+ return maOptions;
+
+ sal_Int32 nPos = 0;
+ while( nPos < aToken.getLength() )
+ {
+ // A letter? Option beginning here.
+ if( rtl::isAsciiAlpha( aToken[nPos] ) )
+ {
+ HtmlOptionId nToken;
+ OUString aValue;
+ sal_Int32 nStt = nPos;
+ sal_Unicode cChar = 0;
+
+ // Actually only certain characters allowed.
+ // Netscape only looks for "=" and white space (c.f.
+ // Mozilla: PA_FetchRequestedNameValues in libparse/pa_mdl.c)
+ while( nPos < aToken.getLength() )
+ {
+ cChar = aToken[nPos];
+ if ( '=' == cChar ||!HTML_ISPRINTABLE(cChar) || rtl::isAsciiWhiteSpace(cChar) )
+ break;
+ nPos++;
+ }
+
+ OUString sName( aToken.subView( nStt, nPos-nStt ) );
+
+ // PlugIns require original token name. Convert to lower case only for searching.
+ nToken = GetHTMLOption( sName.toAsciiLowerCase() ); // Name is ready
+ SAL_WARN_IF( nToken==HtmlOptionId::UNKNOWN, "svtools",
+ "GetOption: unknown HTML option '" << sName << "'" );
+ bool bStripCRLF = (nToken < HtmlOptionId::SCRIPT_START ||
+ nToken >= HtmlOptionId::SCRIPT_END) &&
+ (!pNoConvertToken || nToken != *pNoConvertToken);
+
+ while( nPos < aToken.getLength() )
+ {
+ cChar = aToken[nPos];
+ if ( HTML_ISPRINTABLE(cChar) && !rtl::isAsciiWhiteSpace(cChar) )
+ break;
+ nPos++;
+ }
+
+ // Option with value?
+ if( nPos!=aToken.getLength() && '='==cChar )
+ {
+ nPos++;
+
+ while( nPos < aToken.getLength() )
+ {
+ cChar = aToken[nPos];
+ if ( HTML_ISPRINTABLE(cChar) && ' ' != cChar && '\t' != cChar && '\r' != cChar && '\n' != cChar )
+ break;
+ nPos++;
+ }
+
+ if( nPos != aToken.getLength() )
+ {
+ sal_Int32 nLen = 0;
+ nStt = nPos;
+ if( ('"'==cChar) || '\''==cChar )
+ {
+ sal_Unicode cEnd = cChar;
+ nPos++; nStt++;
+ bool bDone = false;
+ bool bEscape = false;
+ while( nPos < aToken.getLength() && !bDone )
+ {
+ bool bOldEscape = bEscape;
+ bEscape = false;
+ cChar = aToken[nPos];
+ switch( cChar )
+ {
+ case '\r':
+ case '\n':
+ if( bStripCRLF )
+ aToken.remove( nPos, 1 );
+ else
+ {
+ nPos++;
+ nLen++;
+ }
+ break;
+ case '\\':
+ if( bOldEscape )
+ {
+ nPos++;
+ nLen++;
+ }
+ else
+ {
+ aToken.remove( nPos, 1 );
+ bEscape = true;
+ }
+ break;
+ case '"':
+ case '\'':
+ bDone = !bOldEscape && cChar==cEnd;
+ if( !bDone )
+ {
+ nPos++;
+ nLen++;
+ }
+ break;
+ default:
+ nPos++;
+ nLen++;
+ break;
+ }
+ }
+ if( nPos!=aToken.getLength() )
+ nPos++;
+ }
+ else
+ {
+ // More liberal than the standard: allow all printable characters
+ bool bEscape = false;
+ bool bDone = false;
+ while( nPos < aToken.getLength() && !bDone )
+ {
+ bool bOldEscape = bEscape;
+ bEscape = false;
+ sal_Unicode c = aToken[nPos];
+ switch( c )
+ {
+ case ' ':
+ bDone = !bOldEscape;
+ if( !bDone )
+ {
+ nPos++;
+ nLen++;
+ }
+ break;
+
+ case '\t':
+ case '\r':
+ case '\n':
+ bDone = true;
+ break;
+
+ case '\\':
+ if( bOldEscape )
+ {
+ nPos++;
+ nLen++;
+ }
+ else
+ {
+ aToken.remove( nPos, 1 );
+ bEscape = true;
+ }
+ break;
+
+ default:
+ if( HTML_ISPRINTABLE( c ) )
+ {
+ nPos++;
+ nLen++;
+ }
+ else
+ bDone = true;
+ break;
+ }
+ }
+ }
+
+ if( nLen )
+ aValue = aToken.subView( nStt, nLen );
+ }
+ }
+
+ // Token is known and can be saved
+ maOptions.emplace_back(nToken, sName, aValue);
+
+ }
+ else
+ // Ignore white space and unexpected characters
+ nPos++;
+ }
+
+ return maOptions;
+}
+
+HtmlTokenId HTMLParser::FilterPRE( HtmlTokenId nToken )
+{
+ switch( nToken )
+ {
+ // in Netscape they only have impact in not empty paragraphs
+ case HtmlTokenId::PARABREAK_ON:
+ nToken = HtmlTokenId::LINEBREAK;
+ [[fallthrough]];
+ case HtmlTokenId::LINEBREAK:
+ case HtmlTokenId::NEWPARA:
+ nPre_LinePos = 0;
+ if( bPre_IgnoreNewPara )
+ nToken = HtmlTokenId::NONE;
+ break;
+
+ case HtmlTokenId::TABCHAR:
+ {
+ sal_Int32 nSpaces = 8 - (nPre_LinePos % 8);
+ DBG_ASSERT( aToken.isEmpty(), "Why is the token not empty?" );
+ if (aToken.getLength() < nSpaces)
+ {
+ using comphelper::string::padToLength;
+ OUStringBuffer aBuf(aToken);
+ aToken = padToLength(aBuf, nSpaces, ' ').makeStringAndClear();
+ }
+ nPre_LinePos += nSpaces;
+ nToken = HtmlTokenId::TEXTTOKEN;
+ }
+ break;
+ // Keep those
+ case HtmlTokenId::TEXTTOKEN:
+ nPre_LinePos += aToken.getLength();
+ break;
+
+ case HtmlTokenId::SELECT_ON:
+ case HtmlTokenId::SELECT_OFF:
+ case HtmlTokenId::BODY_ON:
+ case HtmlTokenId::FORM_ON:
+ case HtmlTokenId::FORM_OFF:
+ case HtmlTokenId::INPUT:
+ case HtmlTokenId::OPTION:
+ case HtmlTokenId::TEXTAREA_ON:
+ case HtmlTokenId::TEXTAREA_OFF:
+
+ case HtmlTokenId::IMAGE:
+ case HtmlTokenId::APPLET_ON:
+ case HtmlTokenId::APPLET_OFF:
+ case HtmlTokenId::PARAM:
+ case HtmlTokenId::EMBED:
+
+ case HtmlTokenId::HEAD1_ON:
+ case HtmlTokenId::HEAD1_OFF:
+ case HtmlTokenId::HEAD2_ON:
+ case HtmlTokenId::HEAD2_OFF:
+ case HtmlTokenId::HEAD3_ON:
+ case HtmlTokenId::HEAD3_OFF:
+ case HtmlTokenId::HEAD4_ON:
+ case HtmlTokenId::HEAD4_OFF:
+ case HtmlTokenId::HEAD5_ON:
+ case HtmlTokenId::HEAD5_OFF:
+ case HtmlTokenId::HEAD6_ON:
+ case HtmlTokenId::HEAD6_OFF:
+ case HtmlTokenId::BLOCKQUOTE_ON:
+ case HtmlTokenId::BLOCKQUOTE_OFF:
+ case HtmlTokenId::ADDRESS_ON:
+ case HtmlTokenId::ADDRESS_OFF:
+ case HtmlTokenId::HORZRULE:
+
+ case HtmlTokenId::CENTER_ON:
+ case HtmlTokenId::CENTER_OFF:
+ case HtmlTokenId::DIVISION_ON:
+ case HtmlTokenId::DIVISION_OFF:
+
+ case HtmlTokenId::SCRIPT_ON:
+ case HtmlTokenId::SCRIPT_OFF:
+ case HtmlTokenId::RAWDATA:
+
+ case HtmlTokenId::TABLE_ON:
+ case HtmlTokenId::TABLE_OFF:
+ case HtmlTokenId::CAPTION_ON:
+ case HtmlTokenId::CAPTION_OFF:
+ case HtmlTokenId::COLGROUP_ON:
+ case HtmlTokenId::COLGROUP_OFF:
+ case HtmlTokenId::COL_ON:
+ case HtmlTokenId::COL_OFF:
+ case HtmlTokenId::THEAD_ON:
+ case HtmlTokenId::THEAD_OFF:
+ case HtmlTokenId::TFOOT_ON:
+ case HtmlTokenId::TFOOT_OFF:
+ case HtmlTokenId::TBODY_ON:
+ case HtmlTokenId::TBODY_OFF:
+ case HtmlTokenId::TABLEROW_ON:
+ case HtmlTokenId::TABLEROW_OFF:
+ case HtmlTokenId::TABLEDATA_ON:
+ case HtmlTokenId::TABLEDATA_OFF:
+ case HtmlTokenId::TABLEHEADER_ON:
+ case HtmlTokenId::TABLEHEADER_OFF:
+
+ case HtmlTokenId::ANCHOR_ON:
+ case HtmlTokenId::ANCHOR_OFF:
+ case HtmlTokenId::BOLD_ON:
+ case HtmlTokenId::BOLD_OFF:
+ case HtmlTokenId::ITALIC_ON:
+ case HtmlTokenId::ITALIC_OFF:
+ case HtmlTokenId::STRIKE_ON:
+ case HtmlTokenId::STRIKE_OFF:
+ case HtmlTokenId::STRIKETHROUGH_ON:
+ case HtmlTokenId::STRIKETHROUGH_OFF:
+ case HtmlTokenId::UNDERLINE_ON:
+ case HtmlTokenId::UNDERLINE_OFF:
+ case HtmlTokenId::BASEFONT_ON:
+ case HtmlTokenId::BASEFONT_OFF:
+ case HtmlTokenId::FONT_ON:
+ case HtmlTokenId::FONT_OFF:
+ case HtmlTokenId::BLINK_ON:
+ case HtmlTokenId::BLINK_OFF:
+ case HtmlTokenId::SPAN_ON:
+ case HtmlTokenId::SPAN_OFF:
+ case HtmlTokenId::SUBSCRIPT_ON:
+ case HtmlTokenId::SUBSCRIPT_OFF:
+ case HtmlTokenId::SUPERSCRIPT_ON:
+ case HtmlTokenId::SUPERSCRIPT_OFF:
+ case HtmlTokenId::BIGPRINT_ON:
+ case HtmlTokenId::BIGPRINT_OFF:
+ case HtmlTokenId::SMALLPRINT_OFF:
+ case HtmlTokenId::SMALLPRINT_ON:
+
+ case HtmlTokenId::EMPHASIS_ON:
+ case HtmlTokenId::EMPHASIS_OFF:
+ case HtmlTokenId::CITATION_ON:
+ case HtmlTokenId::CITATION_OFF:
+ case HtmlTokenId::STRONG_ON:
+ case HtmlTokenId::STRONG_OFF:
+ case HtmlTokenId::CODE_ON:
+ case HtmlTokenId::CODE_OFF:
+ case HtmlTokenId::SAMPLE_ON:
+ case HtmlTokenId::SAMPLE_OFF:
+ case HtmlTokenId::KEYBOARD_ON:
+ case HtmlTokenId::KEYBOARD_OFF:
+ case HtmlTokenId::VARIABLE_ON:
+ case HtmlTokenId::VARIABLE_OFF:
+ case HtmlTokenId::DEFINSTANCE_ON:
+ case HtmlTokenId::DEFINSTANCE_OFF:
+ case HtmlTokenId::SHORTQUOTE_ON:
+ case HtmlTokenId::SHORTQUOTE_OFF:
+ case HtmlTokenId::LANGUAGE_ON:
+ case HtmlTokenId::LANGUAGE_OFF:
+ case HtmlTokenId::AUTHOR_ON:
+ case HtmlTokenId::AUTHOR_OFF:
+ case HtmlTokenId::PERSON_ON:
+ case HtmlTokenId::PERSON_OFF:
+ case HtmlTokenId::ACRONYM_ON:
+ case HtmlTokenId::ACRONYM_OFF:
+ case HtmlTokenId::ABBREVIATION_ON:
+ case HtmlTokenId::ABBREVIATION_OFF:
+ case HtmlTokenId::INSERTEDTEXT_ON:
+ case HtmlTokenId::INSERTEDTEXT_OFF:
+ case HtmlTokenId::DELETEDTEXT_ON:
+ case HtmlTokenId::DELETEDTEXT_OFF:
+ case HtmlTokenId::TELETYPE_ON:
+ case HtmlTokenId::TELETYPE_OFF:
+
+ break;
+
+ // The remainder is treated as an unknown token.
+ default:
+ if( nToken != HtmlTokenId::NONE )
+ {
+ nToken =
+ ( ((nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken))
+ ? HtmlTokenId::UNKNOWNCONTROL_OFF
+ : HtmlTokenId::UNKNOWNCONTROL_ON );
+ }
+ break;
+ }
+
+ bPre_IgnoreNewPara = false;
+
+ return nToken;
+}
+
+HtmlTokenId HTMLParser::FilterXMP( HtmlTokenId nToken )
+{
+ switch( nToken )
+ {
+ case HtmlTokenId::NEWPARA:
+ if( bPre_IgnoreNewPara )
+ nToken = HtmlTokenId::NONE;
+ [[fallthrough]];
+ case HtmlTokenId::TEXTTOKEN:
+ case HtmlTokenId::NONBREAKSPACE:
+ case HtmlTokenId::SOFTHYPH:
+ break; // kept
+
+ default:
+ if( nToken != HtmlTokenId::NONE )
+ {
+ if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) )
+ {
+ sSaveToken = "</" + sSaveToken;
+ }
+ else
+ sSaveToken = "<" + sSaveToken;
+ if( !aToken.isEmpty() )
+ {
+ UnescapeToken();
+ sSaveToken += " ";
+ aToken.insert(0, sSaveToken);
+ }
+ else
+ aToken = sSaveToken;
+ aToken.append( ">" );
+ nToken = HtmlTokenId::TEXTTOKEN;
+ }
+ break;
+ }
+
+ bPre_IgnoreNewPara = false;
+
+ return nToken;
+}
+
+HtmlTokenId HTMLParser::FilterListing( HtmlTokenId nToken )
+{
+ switch( nToken )
+ {
+ case HtmlTokenId::NEWPARA:
+ if( bPre_IgnoreNewPara )
+ nToken = HtmlTokenId::NONE;
+ [[fallthrough]];
+ case HtmlTokenId::TEXTTOKEN:
+ case HtmlTokenId::NONBREAKSPACE:
+ case HtmlTokenId::SOFTHYPH:
+ break; // kept
+
+ default:
+ if( nToken != HtmlTokenId::NONE )
+ {
+ nToken =
+ ( ((nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken))
+ ? HtmlTokenId::UNKNOWNCONTROL_OFF
+ : HtmlTokenId::UNKNOWNCONTROL_ON );
+ }
+ break;
+ }
+
+ bPre_IgnoreNewPara = false;
+
+ return nToken;
+}
+
+bool HTMLParser::InternalImgToPrivateURL( OUString& rURL )
+{
+ bool bFound = false;
+
+ if( rURL.startsWith( OOO_STRING_SVTOOLS_HTML_internal_icon ) )
+ {
+ OUString aName( rURL.copy(14) );
+ switch( aName[0] )
+ {
+ case 'b':
+ bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_baddata;
+ break;
+ case 'd':
+ bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_delayed;
+ break;
+ case 'e':
+ bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_embed;
+ break;
+ case 'i':
+ bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_insecure;
+ break;
+ case 'n':
+ bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_notfound;
+ break;
+ }
+ }
+ if( bFound )
+ {
+ OUString sTmp ( rURL );
+ rURL = OOO_STRING_SVTOOLS_HTML_private_image;
+ rURL += sTmp;
+ }
+
+ return bFound;
+}
+
+namespace {
+
+enum class HtmlMeta {
+ NONE = 0,
+ Author,
+ Description,
+ Keywords,
+ Refresh,
+ Classification,
+ Created,
+ ChangedBy,
+ Changed,
+ Generator,
+ SDFootnote,
+ SDEndnote,
+ ContentType
+};
+
+}
+
+// <META NAME=xxx>
+HTMLOptionEnum<HtmlMeta> const aHTMLMetaNameTable[] =
+{
+ { OOO_STRING_SVTOOLS_HTML_META_author, HtmlMeta::Author },
+ { OOO_STRING_SVTOOLS_HTML_META_changed, HtmlMeta::Changed },
+ { OOO_STRING_SVTOOLS_HTML_META_changedby, HtmlMeta::ChangedBy },
+ { OOO_STRING_SVTOOLS_HTML_META_classification,HtmlMeta::Classification},
+ { OOO_STRING_SVTOOLS_HTML_META_content_type, HtmlMeta::ContentType },
+ { OOO_STRING_SVTOOLS_HTML_META_created, HtmlMeta::Created },
+ { OOO_STRING_SVTOOLS_HTML_META_description, HtmlMeta::Description },
+ { OOO_STRING_SVTOOLS_HTML_META_keywords, HtmlMeta::Keywords },
+ { OOO_STRING_SVTOOLS_HTML_META_generator, HtmlMeta::Generator },
+ { OOO_STRING_SVTOOLS_HTML_META_refresh, HtmlMeta::Refresh },
+ { OOO_STRING_SVTOOLS_HTML_META_sdendnote, HtmlMeta::SDEndnote },
+ { OOO_STRING_SVTOOLS_HTML_META_sdfootnote, HtmlMeta::SDFootnote },
+ { nullptr, HtmlMeta(0) }
+};
+
+
+void HTMLParser::AddMetaUserDefined( OUString const & )
+{
+}
+
+bool HTMLParser::ParseMetaOptionsImpl(
+ const uno::Reference<document::XDocumentProperties> & i_xDocProps,
+ SvKeyValueIterator *i_pHTTPHeader,
+ const HTMLOptions& aOptions,
+ rtl_TextEncoding& o_rEnc )
+{
+ OUString aName, aContent;
+ HtmlMeta nAction = HtmlMeta::NONE;
+ bool bHTTPEquiv = false, bChanged = false;
+
+ for ( size_t i = aOptions.size(); i; )
+ {
+ const HTMLOption& aOption = aOptions[--i];
+ switch ( aOption.GetToken() )
+ {
+ case HtmlOptionId::NAME:
+ aName = aOption.GetString();
+ if ( HtmlMeta::NONE==nAction )
+ {
+ aOption.GetEnum( nAction, aHTMLMetaNameTable );
+ }
+ break;
+ case HtmlOptionId::HTTPEQUIV:
+ aName = aOption.GetString();
+ aOption.GetEnum( nAction, aHTMLMetaNameTable );
+ bHTTPEquiv = true;
+ break;
+ case HtmlOptionId::CONTENT:
+ aContent = aOption.GetString();
+ break;
+ case HtmlOptionId::CHARSET:
+ {
+ OString sValue(OUStringToOString(aOption.GetString(), RTL_TEXTENCODING_ASCII_US));
+ o_rEnc = GetExtendedCompatibilityTextEncoding(rtl_getTextEncodingFromMimeCharset(sValue.getStr()));
+ break;
+ }
+ default: break;
+ }
+ }
+
+ if ( bHTTPEquiv || HtmlMeta::Description != nAction )
+ {
+ // if it is not a Description, remove CRs and LFs from CONTENT
+ aContent = aContent.replaceAll("\r", "").replaceAll("\n", "");
+ }
+ else
+ {
+ // convert line endings for Description
+ aContent = convertLineEnd(aContent, GetSystemLineEnd());
+ }
+
+ if ( bHTTPEquiv && i_pHTTPHeader )
+ {
+ // Netscape seems to just ignore a closing ", so we do too
+ if ( aContent.endsWith("\"") )
+ {
+ aContent = aContent.copy( 0, aContent.getLength() - 1 );
+ }
+ SvKeyValue aKeyValue( aName, aContent );
+ i_pHTTPHeader->Append( aKeyValue );
+ }
+
+ switch ( nAction )
+ {
+ case HtmlMeta::Author:
+ if (i_xDocProps.is()) {
+ i_xDocProps->setAuthor( aContent );
+ bChanged = true;
+ }
+ break;
+ case HtmlMeta::Description:
+ if (i_xDocProps.is()) {
+ i_xDocProps->setDescription( aContent );
+ bChanged = true;
+ }
+ break;
+ case HtmlMeta::Keywords:
+ if (i_xDocProps.is()) {
+ i_xDocProps->setKeywords(
+ ::comphelper::string::convertCommaSeparated(aContent));
+ bChanged = true;
+ }
+ break;
+ case HtmlMeta::Classification:
+ if (i_xDocProps.is()) {
+ i_xDocProps->setSubject( aContent );
+ bChanged = true;
+ }
+ break;
+
+ case HtmlMeta::ChangedBy:
+ if (i_xDocProps.is()) {
+ i_xDocProps->setModifiedBy( aContent );
+ bChanged = true;
+ }
+ break;
+
+ case HtmlMeta::Created:
+ case HtmlMeta::Changed:
+ if (i_xDocProps.is() && !aContent.isEmpty())
+ {
+ ::util::DateTime uDT;
+ bool valid = false;
+ if (comphelper::string::getTokenCount(aContent, ';') == 2)
+ {
+ sal_Int32 nIdx{ 0 };
+ Date aDate(o3tl::toInt32(o3tl::getToken(aContent, 0, ';', nIdx)));
+ auto nTime = o3tl::toInt64(o3tl::getToken(aContent, 0, ';', nIdx));
+ if (nTime < 0)
+ nTime = o3tl::saturating_toggle_sign(nTime);
+ tools::Time aTime(nTime);
+ DateTime aDateTime(aDate, aTime);
+ uDT = aDateTime.GetUNODateTime();
+ valid = true;
+ }
+ else if (utl::ISO8601parseDateTime(aContent, uDT))
+ valid = true;
+
+ if (valid)
+ {
+ bChanged = true;
+ if (HtmlMeta::Created == nAction)
+ i_xDocProps->setCreationDate(uDT);
+ else
+ i_xDocProps->setModificationDate(uDT);
+ }
+ }
+ break;
+
+ case HtmlMeta::Refresh:
+ DBG_ASSERT( !bHTTPEquiv || i_pHTTPHeader, "Lost Reload-URL because of omitted MUST change." );
+ break;
+
+ case HtmlMeta::ContentType:
+ if ( !aContent.isEmpty() )
+ {
+ o_rEnc = GetEncodingByMIME( aContent );
+ }
+ break;
+
+ case HtmlMeta::NONE:
+ if ( !bHTTPEquiv )
+ {
+ if (i_xDocProps.is())
+ {
+ uno::Reference<beans::XPropertyContainer> xUDProps
+ = i_xDocProps->getUserDefinedProperties();
+ try {
+ xUDProps->addProperty(aName,
+ beans::PropertyAttribute::REMOVABLE,
+ uno::Any(aContent));
+ AddMetaUserDefined(aName);
+ bChanged = true;
+ } catch (uno::Exception &) {
+ // ignore
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return bChanged;
+}
+
+bool HTMLParser::ParseMetaOptions(
+ const uno::Reference<document::XDocumentProperties> & i_xDocProps,
+ SvKeyValueIterator *i_pHeader )
+{
+ HtmlOptionId nContentOption = HtmlOptionId::CONTENT;
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+
+ bool bRet = ParseMetaOptionsImpl( i_xDocProps, i_pHeader,
+ GetOptions(&nContentOption),
+ eEnc );
+
+ // If the encoding is set by a META tag, it may only overwrite the
+ // current encoding if both, the current and the new encoding, are 1-sal_uInt8
+ // encodings. Everything else cannot lead to reasonable results.
+ if (RTL_TEXTENCODING_DONTKNOW != eEnc &&
+ rtl_isOctetTextEncoding( eEnc ) &&
+ rtl_isOctetTextEncoding( GetSrcEncoding() ) )
+ {
+ eEnc = GetExtendedCompatibilityTextEncoding( eEnc );
+ SetSrcEncoding( eEnc );
+ }
+
+ return bRet;
+}
+
+rtl_TextEncoding HTMLParser::GetEncodingByMIME( const OUString& rMime )
+{
+ OUString sType;
+ OUString sSubType;
+ INetContentTypeParameterList aParameters;
+ if (INetContentTypes::parse(rMime, sType, sSubType, &aParameters))
+ {
+ auto const iter = aParameters.find("charset");
+ if (iter != aParameters.end())
+ {
+ const INetContentTypeParameter * pCharset = &iter->second;
+ OString sValue(OUStringToOString(pCharset->m_sValue, RTL_TEXTENCODING_ASCII_US));
+ return GetExtendedCompatibilityTextEncoding( rtl_getTextEncodingFromMimeCharset( sValue.getStr() ) );
+ }
+ }
+ return RTL_TEXTENCODING_DONTKNOW;
+}
+
+rtl_TextEncoding HTMLParser::GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader )
+{
+ rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
+ if( pHTTPHeader )
+ {
+ SvKeyValue aKV;
+ for( bool bCont = pHTTPHeader->GetFirst( aKV ); bCont;
+ bCont = pHTTPHeader->GetNext( aKV ) )
+ {
+ if( aKV.GetKey().equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) )
+ {
+ if( !aKV.GetValue().isEmpty() )
+ {
+ eRet = HTMLParser::GetEncodingByMIME( aKV.GetValue() );
+ }
+ }
+ }
+ }
+ return eRet;
+}
+
+bool HTMLParser::SetEncodingByHTTPHeader( SvKeyValueIterator *pHTTPHeader )
+{
+ bool bRet = false;
+ rtl_TextEncoding eEnc = HTMLParser::GetEncodingByHttpHeader( pHTTPHeader );
+ if(RTL_TEXTENCODING_DONTKNOW != eEnc)
+ {
+ SetSrcEncoding( eEnc );
+ bRet = true;
+ }
+ return bRet;
+}
+
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */