diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /svtools/source/svhtml | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'svtools/source/svhtml')
-rw-r--r-- | svtools/source/svhtml/HtmlWriter.cxx | 152 | ||||
-rw-r--r-- | svtools/source/svhtml/htmlkywd.cxx | 818 | ||||
-rw-r--r-- | svtools/source/svhtml/htmlout.cxx | 1017 | ||||
-rw-r--r-- | svtools/source/svhtml/htmlsupp.cxx | 159 | ||||
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 2213 |
5 files changed, 4359 insertions, 0 deletions
diff --git a/svtools/source/svhtml/HtmlWriter.cxx b/svtools/source/svhtml/HtmlWriter.cxx new file mode 100644 index 0000000000..cd73bad074 --- /dev/null +++ b/svtools/source/svhtml/HtmlWriter.cxx @@ -0,0 +1,152 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include <svtools/HtmlWriter.hxx> +#include <tools/stream.hxx> +#include <sal/log.hxx> +#include <svtools/htmlout.hxx> + +HtmlWriter::HtmlWriter(SvStream& rStream, std::string_view rNamespace) : + mrStream(rStream), + mbPrettyPrint(true) +{ + if (!rNamespace.empty()) + { + // Convert namespace alias to a prefix. + maNamespace = OString::Concat(rNamespace) + ":"; + } +} + +HtmlWriter::~HtmlWriter() +{ + assert(maElementStack.empty()); +} + +void HtmlWriter::prettyPrint(bool b) +{ + mbPrettyPrint = b; +} + +void HtmlWriter::start(const OString& aElement) +{ + if (mbOpeningTagOpen) + { + mrStream.WriteChar('>'); + if (mbPrettyPrint) + mrStream.WriteChar('\n'); + } + maElementStack.push_back(aElement); + + if (mbPrettyPrint) + { + for(size_t i = 0; i < maElementStack.size() - 1; i++) + { + mrStream.WriteOString(" "); + } + } + + mrStream.WriteChar('<'); + mrStream.WriteOString(Concat2View(maNamespace + aElement)); + mbOpeningTagOpen = true; +} + +void HtmlWriter::single(const OString &aContent) +{ + start(aContent); + end(); +} + +void HtmlWriter::flushStack() +{ + while (!maElementStack.empty()) + { + end(); + } +} + +bool HtmlWriter::end(const OString& aElement) +{ + bool bExpected = maElementStack.back() == aElement; + SAL_WARN_IF(!bExpected, "svtools", "HtmlWriter: end element mismatch - '" << aElement << "' expected '" << maElementStack.back() << "'"); + end(); + return bExpected; +} + +void HtmlWriter::end() +{ + if (mbOpeningTagOpen) + { + mrStream.WriteOString("/>"); + if (mbPrettyPrint) + mrStream.WriteOString("\n"); + mbOpeningTagOpen = false; + } + else + { + if (mbPrettyPrint) + { + for(size_t i = 0; i < maElementStack.size() - 1; i++) + { + mrStream.WriteOString(" "); + } + } + mrStream.WriteOString("</"); + mrStream.WriteOString(Concat2View(maNamespace + maElementStack.back())); + mrStream.WriteOString(">"); + if (mbPrettyPrint) + mrStream.WriteOString("\n"); + } + maElementStack.pop_back(); +} + +void HtmlWriter::attribute(std::string_view aAttribute, std::string_view aValue) +{ + attribute(aAttribute, OStringToOUString(aValue, RTL_TEXTENCODING_UTF8)); +} + +void HtmlWriter::attribute(std::string_view aAttribute, const sal_Int32 aValue) +{ + attribute(aAttribute, OString::number(aValue)); +} + +void HtmlWriter::attribute(std::string_view aAttribute, const OUString& aValue) +{ + assert(mbOpeningTagOpen); + if (mbOpeningTagOpen && !aAttribute.empty() && !aValue.isEmpty()) + { + mrStream.WriteChar(' '); + mrStream.WriteOString(aAttribute); + mrStream.WriteOString("=\""); + HTMLOutFuncs::Out_String(mrStream, aValue); + mrStream.WriteChar('"'); + } +} + +void HtmlWriter::attribute(std::string_view aAttribute) +{ + assert(mbOpeningTagOpen); + if (mbOpeningTagOpen && !aAttribute.empty()) + { + mrStream.WriteChar(' '); + mrStream.WriteOString(aAttribute); + } +} + +void HtmlWriter::characters(std::string_view rChars) +{ + if (mbOpeningTagOpen) + { + mrStream.WriteOString(">"); + mbOpeningTagOpen = false; + } + mrStream.WriteOString(rChars); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx new file mode 100644 index 0000000000..d1b0ea2ee0 --- /dev/null +++ b/svtools/source/svhtml/htmlkywd.cxx @@ -0,0 +1,818 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <algorithm> +#include <string_view> + +#include <o3tl/string_view.hxx> +#include <sal/types.h> +#include <rtl/ustring.hxx> +#include <svtools/htmltokn.h> +#include <svtools/htmlkywd.hxx> + +// If this is odd, then getOnToken() breaks. +static_assert(static_cast<sal_Int16>(HtmlTokenId::ABBREVIATION_ON) % 2 == 0); + +namespace { + +template<typename T> +struct TokenEntry +{ + std::u16string_view sToken; + T nToken; +}; + +} + +template<typename T> +static bool sortCompare(const TokenEntry<T> & lhs, const TokenEntry<T> & rhs) +{ + return lhs.sToken < rhs.sToken; +} +template<typename T> +static bool findCompare(const TokenEntry<T> & lhs, std::u16string_view rhs) +{ + return lhs.sToken < rhs; +} +template<typename T, size_t LEN> +static T search(TokenEntry<T> const (&dataTable)[LEN], std::u16string_view key, T notFoundValue) +{ + auto findIt = std::lower_bound( std::begin(dataTable), std::end(dataTable), + key, findCompare<T> ); + if (findIt != std::end(dataTable) && key == findIt->sToken) + return findIt->nToken; + return notFoundValue; +} + +using HTML_TokenEntry = TokenEntry<HtmlTokenId>; + +// this array is sorted by the name (even if it doesn't look like it from the constant names) +HTML_TokenEntry const aHTMLTokenTab[] = { + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_comment), HtmlTokenId::COMMENT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_doctype), HtmlTokenId::DOCTYPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_cdata), HtmlTokenId::CDATA}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_anchor), HtmlTokenId::ANCHOR_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_abbreviation), HtmlTokenId::ABBREVIATION_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_acronym), HtmlTokenId::ACRONYM_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_address), HtmlTokenId::ADDRESS_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_applet), HtmlTokenId::APPLET_ON}, // HotJava + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_area), HtmlTokenId::AREA}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_author), HtmlTokenId::AUTHOR_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_bold), HtmlTokenId::BOLD_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_banner), HtmlTokenId::BANNER_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_base), HtmlTokenId::BASE}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_basefont), HtmlTokenId::BASEFONT_ON}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_bigprint), HtmlTokenId::BIGPRINT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_blink), HtmlTokenId::BLINK_ON}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_blockquote), HtmlTokenId::BLOCKQUOTE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_body), HtmlTokenId::BODY_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_blockquote30), HtmlTokenId::BLOCKQUOTE30_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_linebreak), HtmlTokenId::LINEBREAK}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_caption), HtmlTokenId::CAPTION_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_center), HtmlTokenId::CENTER_ON}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_citation), HtmlTokenId::CITATION_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_code), HtmlTokenId::CODE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_col), HtmlTokenId::COL_ON}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_colgroup), HtmlTokenId::COLGROUP_ON}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_comment2), HtmlTokenId::COMMENT2_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_credit), HtmlTokenId::CREDIT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_dd), HtmlTokenId::DD_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_deletedtext), HtmlTokenId::DELETEDTEXT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_definstance), HtmlTokenId::DEFINSTANCE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_dirlist), HtmlTokenId::DIRLIST_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_division), HtmlTokenId::DIVISION_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_deflist), HtmlTokenId::DEFLIST_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_dt), HtmlTokenId::DT_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_emphasis), HtmlTokenId::EMPHASIS_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_embed), HtmlTokenId::EMBED}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_figure), HtmlTokenId::FIGURE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_footnote), HtmlTokenId::FOOTNOTE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_font), HtmlTokenId::FONT_ON}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_form), HtmlTokenId::FORM_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_frame), HtmlTokenId::FRAME_ON}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_frameset), HtmlTokenId::FRAMESET_ON}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head1), HtmlTokenId::HEAD1_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head2), HtmlTokenId::HEAD2_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head3), HtmlTokenId::HEAD3_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head4), HtmlTokenId::HEAD4_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head5), HtmlTokenId::HEAD5_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head6), HtmlTokenId::HEAD6_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_head), HtmlTokenId::HEAD_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_horzrule), HtmlTokenId::HORZRULE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_html), HtmlTokenId::HTML_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_italic), HtmlTokenId::ITALIC_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_iframe), HtmlTokenId::IFRAME_ON}, // IE 3.0b2 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_image), HtmlTokenId::IMAGE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_input), HtmlTokenId::INPUT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_insertedtext), HtmlTokenId::INSERTEDTEXT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_keyboard), HtmlTokenId::KEYBOARD_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_language), HtmlTokenId::LANGUAGE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_listheader), HtmlTokenId::LISTHEADER_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_li), HtmlTokenId::LI_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_link), HtmlTokenId::LINK}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_listing), HtmlTokenId::LISTING_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_map), HtmlTokenId::MAP_ON}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_marquee), HtmlTokenId::MARQUEE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_menulist), HtmlTokenId::MENULIST_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_meta), HtmlTokenId::META}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_multicol), HtmlTokenId::MULTICOL_ON}, // Netscape 3.0b5 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_nobr), HtmlTokenId::NOBR_ON}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noembed), HtmlTokenId::NOEMBED_ON}, // Netscape 2.0 ??? + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noframe), HtmlTokenId::NOFRAMES_ON}, // Netscape 2.0 ??? + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noframes), HtmlTokenId::NOFRAMES_ON}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_noscript), HtmlTokenId::NOSCRIPT_ON}, // Netscape 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_note), HtmlTokenId::NOTE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_object), HtmlTokenId::OBJECT_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_orderlist), HtmlTokenId::ORDERLIST_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_option), HtmlTokenId::OPTION}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_parabreak), HtmlTokenId::PARABREAK_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_param), HtmlTokenId::PARAM}, // HotJava + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_person), HtmlTokenId::PERSON_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_plaintext2), HtmlTokenId::PLAINTEXT2_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_preformtxt), HtmlTokenId::PREFORMTXT_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_shortquote), HtmlTokenId::SHORTQUOTE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_strikethrough), HtmlTokenId::STRIKETHROUGH_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_sample), HtmlTokenId::SAMPLE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_script), HtmlTokenId::SCRIPT_ON}, // HTML 3.2 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_sdfield), HtmlTokenId::SDFIELD_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_select), HtmlTokenId::SELECT_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_smallprint), HtmlTokenId::SMALLPRINT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_spacer), HtmlTokenId::SPACER}, // Netscape 3.0b5 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_span), HtmlTokenId::SPAN_ON}, // Style Sheets + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_strike), HtmlTokenId::STRIKE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_strong), HtmlTokenId::STRONG_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_style), HtmlTokenId::STYLE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_subscript), HtmlTokenId::SUBSCRIPT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_superscript), HtmlTokenId::SUPERSCRIPT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_plaintext), HtmlTokenId::PLAINTEXT_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_table), HtmlTokenId::TABLE_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tbody), HtmlTokenId::TBODY_ON}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tabledata), HtmlTokenId::TABLEDATA_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_textarea), HtmlTokenId::TEXTAREA_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tfoot), HtmlTokenId::TFOOT_ON}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tableheader), HtmlTokenId::TABLEHEADER_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_thead), HtmlTokenId::THEAD_ON}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_title), HtmlTokenId::TITLE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_tablerow), HtmlTokenId::TABLEROW_ON}, // HTML 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_teletype), HtmlTokenId::TELETYPE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_underline), HtmlTokenId::UNDERLINE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_unorderlist), HtmlTokenId::UNORDERLIST_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_variable), HtmlTokenId::VARIABLE_ON}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_xmp), HtmlTokenId::XMP_ON}, +}; + + +HtmlTokenId GetHTMLToken( std::u16string_view rName ) +{ + static bool bSortKeyWords = false; + if( !bSortKeyWords ) + { + assert( std::is_sorted( std::begin(aHTMLTokenTab), std::end(aHTMLTokenTab), sortCompare<HtmlTokenId> ) ); + bSortKeyWords = true; + } + + if( o3tl::starts_with( rName, u"" OOO_STRING_SVTOOLS_HTML_comment )) + return HtmlTokenId::COMMENT; + + return search( aHTMLTokenTab, rName, HtmlTokenId::NONE); +} + +using HTML_CharEntry = TokenEntry<sal_Unicode>; + +// Flag: RTF token table has already been sorted +static bool bSortCharKeyWords = false; + +static HTML_CharEntry aHTMLCharNameTab[] = { + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_lt), 60}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_gt), 62}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_amp), 38}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_apos), 39}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_quot), 34}, + + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Agrave), 192}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Aacute), 193}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Acirc), 194}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Atilde), 195}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Auml), 196}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Aring), 197}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_AElig), 198}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ccedil), 199}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Egrave), 200}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Eacute), 201}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ecirc), 202}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Euml), 203}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Igrave), 204}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Iacute), 205}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Icirc), 206}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Iuml), 207}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_ETH), 208}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ntilde), 209}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ograve), 210}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Oacute), 211}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ocirc), 212}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Otilde), 213}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ouml), 214}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Oslash), 216}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ugrave), 217}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Uacute), 218}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Ucirc), 219}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Uuml), 220}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_Yacute), 221}, + + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_THORN), 222}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_C_szlig), 223}, + + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_agrave), 224}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_aacute), 225}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_acirc), 226}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_atilde), 227}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_auml), 228}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_aring), 229}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_aelig), 230}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ccedil), 231}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_egrave), 232}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_eacute), 233}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ecirc), 234}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_euml), 235}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_igrave), 236}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iacute), 237}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_icirc), 238}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iuml), 239}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_eth), 240}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ntilde), 241}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ograve), 242}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oacute), 243}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ocirc), 244}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_otilde), 245}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ouml), 246}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oslash), 248}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ugrave), 249}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uacute), 250}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ucirc), 251}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uuml), 252}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_yacute), 253}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_thorn), 254}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_yuml), 255}, + +// special characters + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_acute), 180}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_brvbar), 166}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cedil), 184}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cent), 162}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_copy), 169}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_curren), 164}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_deg), 176}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_divide), 247}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frac12), 189}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frac14), 188}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frac34), 190}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iexcl), 161}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iquest), 191}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_laquo), 171}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_macr), 175}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_micro), 181}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_middot), 183}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_not), 172}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ordf), 170}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ordm), 186}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_para), 182}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_plusmn), 177}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_pound), 163}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_raquo), 187}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_reg), 174}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sect), 167}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup1), 185}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup2), 178}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup3), 179}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_times), 215}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uml), 168}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_yen), 165}, + +// special characters), which will be converted to tokens !!! + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nbsp), 1}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_shy), 2}, + + +// HTML4 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_OElig), 338}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oelig), 339}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Scaron), 352}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_scaron), 353}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Yuml), 376}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_fnof), 402}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_circ), 710}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_tilde), 732}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Alpha), 913}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Beta), 914}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Gamma), 915}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Delta), 916}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Epsilon), 917}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Zeta), 918}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Eta), 919}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Theta), 920}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Iota), 921}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Kappa), 922}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Lambda), 923}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Mu), 924}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Nu), 925}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Xi), 926}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Omicron), 927}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Pi), 928}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Rho), 929}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Sigma), 931}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Tau), 932}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Upsilon), 933}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Phi), 934}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Chi), 935}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Psi), 936}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Omega), 937}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_alpha), 945}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_beta), 946}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_gamma), 947}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_delta), 948}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_epsilon), 949}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_zeta), 950}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_eta), 951}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_theta), 952}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_iota), 953}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_kappa), 954}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lambda), 955}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_mu), 956}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nu), 957}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_xi), 958}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_omicron), 959}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_pi), 960}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rho), 961}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sigmaf), 962}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sigma), 963}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_tau), 964}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_upsilon), 965}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_phi), 966}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_chi), 967}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_psi), 968}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_omega), 969}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_thetasym), 977}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_upsih), 978}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_piv), 982}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ensp), 8194}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_emsp), 8195}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_thinsp), 8201}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_zwnj), 8204}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_zwj), 8205}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lrm), 8206}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rlm), 8207}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ndash), 8211}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_mdash), 8212}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lsquo), 8216}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rsquo), 8217}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sbquo), 8218}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ldquo), 8220}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rdquo), 8221}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_bdquo), 8222}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_dagger), 8224}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Dagger), 8225}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_bull), 8226}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_hellip), 8230}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_permil), 8240}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_prime), 8242}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_Prime), 8243}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lsaquo), 8249}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rsaquo), 8250}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oline), 8254}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_frasl), 8260}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_euro), 8364}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_image), 8465}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_weierp), 8472}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_real), 8476}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_trade), 8482}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_alefsym), 8501}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_larr), 8592}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uarr), 8593}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rarr), 8594}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_darr), 8595}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_harr), 8596}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_crarr), 8629}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lArr), 8656}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_uArr), 8657}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rArr), 8658}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_dArr), 8659}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_hArr), 8660}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_forall), 8704}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_part), 8706}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_exist), 8707}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_empty), 8709}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nabla), 8711}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_isin), 8712}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_notin), 8713}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ni), 8715}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_prod), 8719}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sum), 8721}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_minus), 8722}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lowast), 8727}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_radic), 8730}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_prop), 8733}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_infin), 8734}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ang), 8736}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_and), 8743}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_or), 8744}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cap), 8745}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cup), 8746}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_int), 8747}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_there4), 8756}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sim), 8764}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_cong), 8773}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_asymp), 8776}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ne), 8800}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_equiv), 8801}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_le), 8804}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_ge), 8805}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sub), 8834}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sup), 8835}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_nsub), 8836}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sube), 8838}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_supe), 8839}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_oplus), 8853}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_otimes), 8855}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_perp), 8869}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_sdot), 8901}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lceil), 8968}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rceil), 8969}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lfloor), 8970}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rfloor), 8971}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_lang), 9001}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_rang), 9002}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_loz), 9674}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_spades), 9824}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_clubs), 9827}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_hearts), 9829}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_S_diams), 9830} +}; + +sal_Unicode GetHTMLCharName( std::u16string_view rName ) +{ + if( !bSortCharKeyWords ) + { + std::sort( std::begin(aHTMLCharNameTab), std::end(aHTMLCharNameTab), + sortCompare<sal_Unicode> ); + bSortCharKeyWords = true; + } + + return search<sal_Unicode>( aHTMLCharNameTab, rName, 0); +} + +// Flag: Options table has already been sorted +static bool bSortOptionKeyWords = false; + +using HTML_OptionEntry = TokenEntry<HtmlOptionId>; + +static HTML_OptionEntry aHTMLOptionTab[] = { + +// Attributes without value + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_checked), HtmlOptionId::CHECKED}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_compact), HtmlOptionId::COMPACT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_declare), HtmlOptionId::DECLARE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_disabled), HtmlOptionId::DISABLED}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_ismap), HtmlOptionId::ISMAP}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_mayscript), HtmlOptionId::MAYSCRIPT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_multiple), HtmlOptionId::MULTIPLE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_nohref), HtmlOptionId::NOHREF}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_noresize), HtmlOptionId::NORESIZE}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_noshade), HtmlOptionId::NOSHADE}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_nowrap), HtmlOptionId::NOWRAP}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdfixed), HtmlOptionId::SDFIXED}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_selected), HtmlOptionId::SELECTED}, + +// Attributes with a string value + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_accept), HtmlOptionId::ACCEPT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_accesskey), HtmlOptionId::ACCESSKEY}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_alt), HtmlOptionId::ALT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_axis), HtmlOptionId::AXIS}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_char), HtmlOptionId::CHAR}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_charset), HtmlOptionId::CHARSET}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_class), HtmlOptionId::CLASS}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_code), HtmlOptionId::CODE}, // HotJava + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_codetype), HtmlOptionId::CODETYPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_content), HtmlOptionId::CONTENT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_coords), HtmlOptionId::COORDS}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_enctype), HtmlOptionId::ENCTYPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_face), HtmlOptionId::FACE}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_frameborder), HtmlOptionId::FRAMEBORDER}, // IExplorer 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_httpequiv), HtmlOptionId::HTTPEQUIV}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_language), HtmlOptionId::LANGUAGE}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_name), HtmlOptionId::NAME}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_prompt), HtmlOptionId::PROMPT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_shape), HtmlOptionId::SHAPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_standby), HtmlOptionId::STANDBY}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_style), HtmlOptionId::STYLE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_title), HtmlOptionId::TITLE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_value), HtmlOptionId::VALUE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDval), HtmlOptionId::SDVAL}, // StarDiv NumberValue + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDnum), HtmlOptionId::SDNUM}, // StarDiv NumberFormat + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdlibrary), HtmlOptionId::SDLIBRARY}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdmodule), HtmlOptionId::SDMODULE}, + +// Attributes with a SGML identifier value + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_id), HtmlOptionId::ID}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_target), HtmlOptionId::TARGET}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_to), HtmlOptionId::TO}, + +// Attributes with a URI value + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_action), HtmlOptionId::ACTION}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_archive), HtmlOptionId::ARCHIVE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_background), HtmlOptionId::BACKGROUND}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_classid), HtmlOptionId::CLASSID}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_codebase), HtmlOptionId::CODEBASE}, // HotJava + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_data), HtmlOptionId::DATA}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_href), HtmlOptionId::HREF}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_script), HtmlOptionId::SCRIPT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_src), HtmlOptionId::SRC}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_usemap), HtmlOptionId::USEMAP}, // Netscape 2.0 + +// Attributes with a color value (all Netscape versions) + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_alink), HtmlOptionId::ALINK}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bgcolor), HtmlOptionId::BGCOLOR}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bordercolor), HtmlOptionId::BORDERCOLOR}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bordercolorlight), HtmlOptionId::BORDERCOLORLIGHT}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_bordercolordark), HtmlOptionId::BORDERCOLORDARK}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_color), HtmlOptionId::COLOR}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_link), HtmlOptionId::LINK}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_text), HtmlOptionId::TEXT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_vlink), HtmlOptionId::VLINK}, + +// Attributes with a numerical value + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_border), HtmlOptionId::BORDER}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_cellspacing),HtmlOptionId::CELLSPACING}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_cellpadding),HtmlOptionId::CELLPADDING}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_charoff), HtmlOptionId::CHAROFF}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_colspan), HtmlOptionId::COLSPAN}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_framespacing), HtmlOptionId::FRAMESPACING}, // IExplorer 3.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_gutter), HtmlOptionId::GUTTER}, // Netscape 3.0b5 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_height), HtmlOptionId::HEIGHT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_hspace), HtmlOptionId::HSPACE}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_left), HtmlOptionId::LEFT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_loop), HtmlOptionId::LOOP}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_marginheight),HtmlOptionId::MARGINHEIGHT}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_marginwidth),HtmlOptionId::MARGINWIDTH}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_maxlength), HtmlOptionId::MAXLENGTH}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rowspan), HtmlOptionId::ROWSPAN}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_scrollamount), HtmlOptionId::SCROLLAMOUNT}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_scrolldelay), HtmlOptionId::SCROLLDELAY}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_span), HtmlOptionId::SPAN}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_tabindex), HtmlOptionId::TABINDEX}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_vspace), HtmlOptionId::VSPACE}, // Netscape + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_width), HtmlOptionId::WIDTH}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_zindex), HtmlOptionId::ZINDEX}, + +// Attributes with enum values + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_behavior), HtmlOptionId::BEHAVIOR}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_clear), HtmlOptionId::CLEAR}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_dir), HtmlOptionId::DIR}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_direction), HtmlOptionId::DIRECTION}, // IExplorer 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_format), HtmlOptionId::FORMAT}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_frame), HtmlOptionId::FRAME}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_lang), HtmlOptionId::LANG}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_method), HtmlOptionId::METHOD}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rel), HtmlOptionId::REL}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rev), HtmlOptionId::REV}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rules), HtmlOptionId::RULES}, // HTML 3 Table Model Draft + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_scrolling), HtmlOptionId::SCROLLING}, // Netscape 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_sdreadonly), HtmlOptionId::SDREADONLY}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_subtype), HtmlOptionId::SUBTYPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_type), HtmlOptionId::TYPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valign), HtmlOptionId::VALIGN}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_valuetype), HtmlOptionId::VALUETYPE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_wrap), HtmlOptionId::WRAP}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_XHTML_O_xml_space), HtmlOptionId::XML_SPACE}, + +// Attributes with script code value + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onblur), HtmlOptionId::ONBLUR}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onchange), HtmlOptionId::ONCHANGE}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onclick), HtmlOptionId::ONCLICK}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onfocus), HtmlOptionId::ONFOCUS}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onload), HtmlOptionId::ONLOAD}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onmouseover), HtmlOptionId::ONMOUSEOVER}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onreset), HtmlOptionId::ONRESET}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onselect), HtmlOptionId::ONSELECT}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onsubmit), HtmlOptionId::ONSUBMIT}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onunload), HtmlOptionId::ONUNLOAD}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onabort), HtmlOptionId::ONABORT}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onerror), HtmlOptionId::ONERROR}, // JavaScript + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_onmouseout), HtmlOptionId::ONMOUSEOUT}, // JavaScript + + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonblur), HtmlOptionId::SDONBLUR}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonchange), HtmlOptionId::SDONCHANGE}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonclick), HtmlOptionId::SDONCLICK}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonfocus), HtmlOptionId::SDONFOCUS}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonload), HtmlOptionId::SDONLOAD}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonmouseover), HtmlOptionId::SDONMOUSEOVER}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonreset), HtmlOptionId::SDONRESET}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonselect), HtmlOptionId::SDONSELECT}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonsubmit), HtmlOptionId::SDONSUBMIT}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonunload), HtmlOptionId::SDONUNLOAD}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonabort), HtmlOptionId::SDONABORT}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonerror), HtmlOptionId::SDONERROR}, // StarBasic + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_SDonmouseout), HtmlOptionId::SDONMOUSEOUT}, // StarBasic + +// Attributes with context sensitive values + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_align), HtmlOptionId::ALIGN}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_cols), HtmlOptionId::COLS}, // Netscape 2.0 vs HTML 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_rows), HtmlOptionId::ROWS}, // Netscape 2.0 vs HTML 2.0 + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_size), HtmlOptionId::SIZE}, + {std::u16string_view(u"" OOO_STRING_SVTOOLS_HTML_O_start), HtmlOptionId::START}, // Netscape 2.0 vs IExplorer 2.0 +}; + +HtmlOptionId GetHTMLOption( std::u16string_view rName ) +{ + if( !bSortOptionKeyWords ) + { + std::sort( std::begin(aHTMLOptionTab), std::end(aHTMLOptionTab), sortCompare<HtmlOptionId> ); + bSortOptionKeyWords = true; + } + + return search( aHTMLOptionTab, rName, HtmlOptionId::UNKNOWN); +} + + +using HTML_ColorEntry = TokenEntry<sal_uInt32>; + +// Flag: color table has already been sorted +static bool bSortColorKeyWords = false; + +// Color names are not exported (source: +// "http://www.uio.no/~mnbjerke/colors_w.html") +// "http://www.infi.net/wwwimages/colorindex.html" seem to be buggy. +HTML_ColorEntry const aHTMLColorNameTab[] = { + { std::u16string_view(u"aliceblue"), 0x00f0f8ffUL }, + { std::u16string_view(u"antiquewhite"), 0x00faebd7UL }, + { std::u16string_view(u"aqua"), 0x0000ffffUL }, + { std::u16string_view(u"aquamarine"), 0x007fffd4UL }, + { std::u16string_view(u"azure"), 0x00f0ffffUL }, + { std::u16string_view(u"beige"), 0x00f5f5dcUL }, + { std::u16string_view(u"bisque"), 0x00ffe4c4UL }, + { std::u16string_view(u"black"), 0x00000000UL }, + { std::u16string_view(u"blanchedalmond"), 0x00ffebcdUL }, + { std::u16string_view(u"blue"), 0x000000ffUL }, + { std::u16string_view(u"blueviolet"), 0x008a2be2UL }, + { std::u16string_view(u"brown"), 0x00a52a2aUL }, + { std::u16string_view(u"burlywood"), 0x00deb887UL }, + { std::u16string_view(u"cadetblue"), 0x005f9ea0UL }, + { std::u16string_view(u"chartreuse"), 0x007fff00UL }, + { std::u16string_view(u"chocolate"), 0x00d2691eUL }, + { std::u16string_view(u"coral"), 0x00ff7f50UL }, + { std::u16string_view(u"cornflowerblue"), 0x006495edUL }, + { std::u16string_view(u"cornsilk"), 0x00fff8dcUL }, + { std::u16string_view(u"crimson"), 0x00dc143cUL }, + { std::u16string_view(u"cyan"), 0x0000ffffUL }, + { std::u16string_view(u"darkblue"), 0x0000008bUL }, + { std::u16string_view(u"darkcyan"), 0x00008b8bUL }, + { std::u16string_view(u"darkgoldenrod"), 0x00b8860bUL }, + { std::u16string_view(u"darkgray"), 0x00a9a9a9UL }, + { std::u16string_view(u"darkgreen"), 0x00006400UL }, + { std::u16string_view(u"darkkhaki"), 0x00bdb76bUL }, + { std::u16string_view(u"darkmagenta"), 0x008b008bUL }, + { std::u16string_view(u"darkolivegreen"), 0x00556b2fUL }, + { std::u16string_view(u"darkorange"), 0x00ff8c00UL }, + { std::u16string_view(u"darkorchid"), 0x009932ccUL }, + { std::u16string_view(u"darkred"), 0x008b0000UL }, + { std::u16string_view(u"darksalmon"), 0x00e9967aUL }, + { std::u16string_view(u"darkseagreen"), 0x008fbc8fUL }, + { std::u16string_view(u"darkslateblue"), 0x00483d8bUL }, + { std::u16string_view(u"darkslategray"), 0x002f4f4fUL }, + { std::u16string_view(u"darkturquoise"), 0x0000ced1UL }, + { std::u16string_view(u"darkviolet"), 0x009400d3UL }, + { std::u16string_view(u"deeppink"), 0x00ff1493UL }, + { std::u16string_view(u"deepskyblue"), 0x0000bfffUL }, + { std::u16string_view(u"dimgray"), 0x00696969UL }, + { std::u16string_view(u"dodgerblue"), 0x001e90ffUL }, + { std::u16string_view(u"firebrick"), 0x00b22222UL }, + { std::u16string_view(u"floralwhite"), 0x00fffaf0UL }, + { std::u16string_view(u"forestgreen"), 0x00228b22UL }, + { std::u16string_view(u"fuchsia"), 0x00ff00ffUL }, + { std::u16string_view(u"gainsboro"), 0x00dcdcdcUL }, + { std::u16string_view(u"ghostwhite"), 0x00f8f8ffUL }, + { std::u16string_view(u"gold"), 0x00ffd700UL }, + { std::u16string_view(u"goldenrod"), 0x00daa520UL }, + { std::u16string_view(u"gray"), 0x00808080UL }, + { std::u16string_view(u"green"), 0x00008000UL }, + { std::u16string_view(u"greenyellow"), 0x00adff2fUL }, + { std::u16string_view(u"honeydew"), 0x00f0fff0UL }, + { std::u16string_view(u"hotpink"), 0x00ff69b4UL }, + { std::u16string_view(u"indianred"), 0x00cd5c5cUL }, + { std::u16string_view(u"indigo"), 0x004b0082UL }, + { std::u16string_view(u"ivory"), 0x00fffff0UL }, + { std::u16string_view(u"khaki"), 0x00f0e68cUL }, + { std::u16string_view(u"lavender"), 0x00e6e6faUL }, + { std::u16string_view(u"lavenderblush"), 0x00fff0f5UL }, + { std::u16string_view(u"lawngreen"), 0x007cfc00UL }, + { std::u16string_view(u"lemonchiffon"), 0x00fffacdUL }, + { std::u16string_view(u"lightblue"), 0x00add8e6UL }, + { std::u16string_view(u"lightcoral"), 0x00f08080UL }, + { std::u16string_view(u"lightcyan"), 0x00e0ffffUL }, + { std::u16string_view(u"lightgoldenrodyellow"), 0x00fafad2UL }, + { std::u16string_view(u"lightgreen"), 0x0090ee90UL }, + { std::u16string_view(u"lightgrey"), 0x00d3d3d3UL }, + { std::u16string_view(u"lightpink"), 0x00ffb6c1UL }, + { std::u16string_view(u"lightsalmon"), 0x00ffa07aUL }, + { std::u16string_view(u"lightseagreen"), 0x0020b2aaUL }, + { std::u16string_view(u"lightskyblue"), 0x0087cefaUL }, + { std::u16string_view(u"lightslategray"), 0x00778899UL }, + { std::u16string_view(u"lightsteelblue"), 0x00b0c4deUL }, + { std::u16string_view(u"lightyellow"), 0x00ffffe0UL }, + { std::u16string_view(u"lime"), 0x0000ff00UL }, + { std::u16string_view(u"limegreen"), 0x0032cd32UL }, + { std::u16string_view(u"linen"), 0x00faf0e6UL }, + { std::u16string_view(u"magenta"), 0x00ff00ffUL }, + { std::u16string_view(u"maroon"), 0x00800000UL }, + { std::u16string_view(u"mediumaquamarine"), 0x0066cdaaUL }, + { std::u16string_view(u"mediumblue"), 0x000000cdUL }, + { std::u16string_view(u"mediumorchid"), 0x00ba55d3UL }, + { std::u16string_view(u"mediumpurple"), 0x009370dbUL }, + { std::u16string_view(u"mediumseagreen"), 0x003cb371UL }, + { std::u16string_view(u"mediumslateblue"), 0x007b68eeUL }, + { std::u16string_view(u"mediumspringgreen"), 0x0000fa9aUL }, + { std::u16string_view(u"mediumturquoise"), 0x0048d1ccUL }, + { std::u16string_view(u"mediumvioletred"), 0x00c71585UL }, + { std::u16string_view(u"midnightblue"), 0x00191970UL }, + { std::u16string_view(u"mintcream"), 0x00f5fffaUL }, + { std::u16string_view(u"mistyrose"), 0x00ffe4e1UL }, + { std::u16string_view(u"moccasin"), 0x00ffe4b5UL }, + { std::u16string_view(u"navajowhite"), 0x00ffdeadUL }, + { std::u16string_view(u"navy"), 0x00000080UL }, + { std::u16string_view(u"oldlace"), 0x00fdf5e6UL }, + { std::u16string_view(u"olive"), 0x00808000UL }, + { std::u16string_view(u"olivedrab"), 0x006b8e23UL }, + { std::u16string_view(u"orange"), 0x00ffa500UL }, + { std::u16string_view(u"orangered"), 0x00ff4500UL }, + { std::u16string_view(u"orchid"), 0x00da70d6UL }, + { std::u16string_view(u"palegoldenrod"), 0x00eee8aaUL }, + { std::u16string_view(u"palegreen"), 0x0098fb98UL }, + { std::u16string_view(u"paleturquoise"), 0x00afeeeeUL }, + { std::u16string_view(u"palevioletred"), 0x00db7093UL }, + { std::u16string_view(u"papayawhip"), 0x00ffefd5UL }, + { std::u16string_view(u"peachpuff"), 0x00ffdab9UL }, + { std::u16string_view(u"peru"), 0x00cd853fUL }, + { std::u16string_view(u"pink"), 0x00ffc0cbUL }, + { std::u16string_view(u"plum"), 0x00dda0ddUL }, + { std::u16string_view(u"powderblue"), 0x00b0e0e6UL }, + { std::u16string_view(u"purple"), 0x00800080UL }, + { std::u16string_view(u"red"), 0x00ff0000UL }, + { std::u16string_view(u"rosybrown"), 0x00bc8f8fUL }, + { std::u16string_view(u"royalblue"), 0x004169e1UL }, + { std::u16string_view(u"saddlebrown"), 0x008b4513UL }, + { std::u16string_view(u"salmon"), 0x00fa8072UL }, + { std::u16string_view(u"sandybrown"), 0x00f4a460UL }, + { std::u16string_view(u"seagreen"), 0x002e8b57UL }, + { std::u16string_view(u"seashell"), 0x00fff5eeUL }, + { std::u16string_view(u"sienna"), 0x00a0522dUL }, + { std::u16string_view(u"silver"), 0x00c0c0c0UL }, + { std::u16string_view(u"skyblue"), 0x0087ceebUL }, + { std::u16string_view(u"slateblue"), 0x006a5acdUL }, + { std::u16string_view(u"slategray"), 0x00708090UL }, + { std::u16string_view(u"snow"), 0x00fffafaUL }, + { std::u16string_view(u"springgreen"), 0x0000ff7fUL }, + { std::u16string_view(u"steelblue"), 0x004682b4UL }, + { std::u16string_view(u"tan"), 0x00d2b48cUL }, + { std::u16string_view(u"teal"), 0x00008080UL }, + { std::u16string_view(u"thistle"), 0x00d8bfd8UL }, + { std::u16string_view(u"tomato"), 0x00ff6347UL }, + { std::u16string_view(u"turquoise"), 0x0040e0d0UL }, + { std::u16string_view(u"violet"), 0x00ee82eeUL }, + { std::u16string_view(u"wheat"), 0x00f5deb3UL }, + { std::u16string_view(u"white"), 0x00ffffffUL }, + { std::u16string_view(u"whitesmoke"), 0x00f5f5f5UL }, + { std::u16string_view(u"yellow"), 0x00ffff00UL }, + { std::u16string_view(u"yellowgreen"), 0x009acd32UL } +}; + +sal_uInt32 GetHTMLColor( const OUString& rName ) +{ + if( !bSortColorKeyWords ) + { + assert( std::is_sorted( std::begin(aHTMLColorNameTab), std::end(aHTMLColorNameTab), + sortCompare<sal_uInt32> ) ); + bSortColorKeyWords = true; + } + + OUString aLowerCase(rName.toAsciiLowerCase()); + + return search<sal_uInt32>( aHTMLColorNameTab, aLowerCase, SAL_MAX_UINT32); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/svtools/source/svhtml/htmlout.cxx b/svtools/source/svhtml/htmlout.cxx new file mode 100644 index 0000000000..4f027b0c10 --- /dev/null +++ b/svtools/source/svhtml/htmlout.cxx @@ -0,0 +1,1017 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <svl/numformat.hxx> +#include <svl/zformat.hxx> +#include <svl/macitem.hxx> +#include <vcl/svapp.hxx> +#include <vcl/settings.hxx> + +#include <svtools/HtmlWriter.hxx> +#include <svtools/htmlout.hxx> +#include <svtools/htmlkywd.hxx> +#include <vcl/imap.hxx> +#include <vcl/imaprect.hxx> +#include <vcl/imapcirc.hxx> +#include <vcl/imappoly.hxx> +#include <svl/urihelper.hxx> +#include <rtl/character.hxx> +#include <tools/debug.hxx> + +#include <sstream> + +#define TXTCONV_BUFFER_SIZE 20 + +static sal_Size convertUnicodeToText(const sal_Unicode* pSrcBuf, sal_Size nSrcChars, char* pDestBuf, + sal_Size nDestBytes, sal_uInt32 nFlags, sal_uInt32* pInfo, + sal_Size* pSrcCvtChars) +{ + static rtl_UnicodeToTextConverter hConverter + = rtl_createUnicodeToTextConverter(RTL_TEXTENCODING_UTF8); + static rtl_UnicodeToTextContext hContext = hConverter + ? rtl_createUnicodeToTextContext(hConverter) + : reinterpret_cast<rtl_TextToUnicodeContext>(1); + + return rtl_convertUnicodeToText(hConverter, hContext, pSrcBuf, nSrcChars, pDestBuf, nDestBytes, + nFlags, pInfo, pSrcCvtChars); +} + +static const char *lcl_svhtml_GetEntityForChar( sal_uInt32 c, + rtl_TextEncoding eDestEnc ) +{ + const char* pStr = nullptr; + + // Note: We currently handle special cases for ISO-8859-2 here simply because + // the code was already submitted. But we should also handle other code pages + // as well as the code becomes available. + + if( eDestEnc == RTL_TEXTENCODING_ISO_8859_2 || eDestEnc == RTL_TEXTENCODING_MS_1250 ) + { + // Don't handle the following characters for Easter European (ISO-8859-2). + switch ( c ) + { + case 164: // curren + case 184: // ccedil + case 193: // Aacute + case 194: // Acirc + case 196: // Auml + case 199: // Ccedil + case 201: // Eacute + case 203: // Euml + case 205: // Iacute + case 206: // Icirc + case 211: // Oacute + case 212: // Ocirc + case 214: // Ouml + case 215: // times + case 218: // Uacute + case 220: // Uuml + case 221: // Yacute + case 225: // aacute + case 226: // acirc + case 228: // auml + case 233: // eacute + case 235: // euml + case 237: // iacute + case 238: // icirc + case 243: // oacute + case 244: // ocirc + case 246: // ouml + case 247: // divide + case 250: // uacute + case 252: // uuml + case 253: // yacute + case 352: // Scaron + case 353: // scaron + return pStr; + } + } + + // TODO: handle more special cases for other code pages. + + switch( c ) + { +// case '\x0a': return HTMLOutFuncs::Out_Tag( rStream, OOO_STRING_SVTOOLS_HTML_linebreak ); + + case '<': pStr = OOO_STRING_SVTOOLS_HTML_C_lt; break; + case '>': pStr = OOO_STRING_SVTOOLS_HTML_C_gt; break; + case '&': pStr = OOO_STRING_SVTOOLS_HTML_C_amp; break; + case '"': pStr = OOO_STRING_SVTOOLS_HTML_C_quot; break; + + case 161: pStr = OOO_STRING_SVTOOLS_HTML_S_iexcl; break; + case 162: pStr = OOO_STRING_SVTOOLS_HTML_S_cent; break; + case 163: pStr = OOO_STRING_SVTOOLS_HTML_S_pound; break; + case 164: pStr = OOO_STRING_SVTOOLS_HTML_S_curren; break; + case 165: pStr = OOO_STRING_SVTOOLS_HTML_S_yen; break; + case 166: pStr = OOO_STRING_SVTOOLS_HTML_S_brvbar; break; + case 167: pStr = OOO_STRING_SVTOOLS_HTML_S_sect; break; + case 168: pStr = OOO_STRING_SVTOOLS_HTML_S_uml; break; + case 169: pStr = OOO_STRING_SVTOOLS_HTML_S_copy; break; + case 170: pStr = OOO_STRING_SVTOOLS_HTML_S_ordf; break; + case 171: pStr = OOO_STRING_SVTOOLS_HTML_S_laquo; break; + case 172: pStr = OOO_STRING_SVTOOLS_HTML_S_not; break; + case 174: pStr = OOO_STRING_SVTOOLS_HTML_S_reg; break; + case 175: pStr = OOO_STRING_SVTOOLS_HTML_S_macr; break; + case 176: pStr = OOO_STRING_SVTOOLS_HTML_S_deg; break; + case 177: pStr = OOO_STRING_SVTOOLS_HTML_S_plusmn; break; + case 178: pStr = OOO_STRING_SVTOOLS_HTML_S_sup2; break; + case 179: pStr = OOO_STRING_SVTOOLS_HTML_S_sup3; break; + case 180: pStr = OOO_STRING_SVTOOLS_HTML_S_acute; break; + case 181: pStr = OOO_STRING_SVTOOLS_HTML_S_micro; break; + case 182: pStr = OOO_STRING_SVTOOLS_HTML_S_para; break; + case 183: pStr = OOO_STRING_SVTOOLS_HTML_S_middot; break; + case 184: pStr = OOO_STRING_SVTOOLS_HTML_S_cedil; break; + case 185: pStr = OOO_STRING_SVTOOLS_HTML_S_sup1; break; + case 186: pStr = OOO_STRING_SVTOOLS_HTML_S_ordm; break; + case 187: pStr = OOO_STRING_SVTOOLS_HTML_S_raquo; break; + case 188: pStr = OOO_STRING_SVTOOLS_HTML_S_frac14; break; + case 189: pStr = OOO_STRING_SVTOOLS_HTML_S_frac12; break; + case 190: pStr = OOO_STRING_SVTOOLS_HTML_S_frac34; break; + case 191: pStr = OOO_STRING_SVTOOLS_HTML_S_iquest; break; + + case 192: pStr = OOO_STRING_SVTOOLS_HTML_C_Agrave; break; + case 193: pStr = OOO_STRING_SVTOOLS_HTML_C_Aacute; break; + case 194: pStr = OOO_STRING_SVTOOLS_HTML_C_Acirc; break; + case 195: pStr = OOO_STRING_SVTOOLS_HTML_C_Atilde; break; + case 196: pStr = OOO_STRING_SVTOOLS_HTML_C_Auml; break; + case 197: pStr = OOO_STRING_SVTOOLS_HTML_C_Aring; break; + case 198: pStr = OOO_STRING_SVTOOLS_HTML_C_AElig; break; + case 199: pStr = OOO_STRING_SVTOOLS_HTML_C_Ccedil; break; + case 200: pStr = OOO_STRING_SVTOOLS_HTML_C_Egrave; break; + case 201: pStr = OOO_STRING_SVTOOLS_HTML_C_Eacute; break; + case 202: pStr = OOO_STRING_SVTOOLS_HTML_C_Ecirc; break; + case 203: pStr = OOO_STRING_SVTOOLS_HTML_C_Euml; break; + case 204: pStr = OOO_STRING_SVTOOLS_HTML_C_Igrave; break; + case 205: pStr = OOO_STRING_SVTOOLS_HTML_C_Iacute; break; + case 206: pStr = OOO_STRING_SVTOOLS_HTML_C_Icirc; break; + case 207: pStr = OOO_STRING_SVTOOLS_HTML_C_Iuml; break; + case 208: pStr = OOO_STRING_SVTOOLS_HTML_C_ETH; break; + case 209: pStr = OOO_STRING_SVTOOLS_HTML_C_Ntilde; break; + case 210: pStr = OOO_STRING_SVTOOLS_HTML_C_Ograve; break; + case 211: pStr = OOO_STRING_SVTOOLS_HTML_C_Oacute; break; + case 212: pStr = OOO_STRING_SVTOOLS_HTML_C_Ocirc; break; + case 213: pStr = OOO_STRING_SVTOOLS_HTML_C_Otilde; break; + case 214: pStr = OOO_STRING_SVTOOLS_HTML_C_Ouml; break; + case 215: pStr = OOO_STRING_SVTOOLS_HTML_S_times; break; + case 216: pStr = OOO_STRING_SVTOOLS_HTML_C_Oslash; break; + case 217: pStr = OOO_STRING_SVTOOLS_HTML_C_Ugrave; break; + case 218: pStr = OOO_STRING_SVTOOLS_HTML_C_Uacute; break; + case 219: pStr = OOO_STRING_SVTOOLS_HTML_C_Ucirc; break; + case 220: pStr = OOO_STRING_SVTOOLS_HTML_C_Uuml; break; + case 221: pStr = OOO_STRING_SVTOOLS_HTML_C_Yacute; break; + + case 222: pStr = OOO_STRING_SVTOOLS_HTML_C_THORN; break; + case 223: pStr = OOO_STRING_SVTOOLS_HTML_C_szlig; break; + + case 224: pStr = OOO_STRING_SVTOOLS_HTML_S_agrave; break; + case 225: pStr = OOO_STRING_SVTOOLS_HTML_S_aacute; break; + case 226: pStr = OOO_STRING_SVTOOLS_HTML_S_acirc; break; + case 227: pStr = OOO_STRING_SVTOOLS_HTML_S_atilde; break; + case 228: pStr = OOO_STRING_SVTOOLS_HTML_S_auml; break; + case 229: pStr = OOO_STRING_SVTOOLS_HTML_S_aring; break; + case 230: pStr = OOO_STRING_SVTOOLS_HTML_S_aelig; break; + case 231: pStr = OOO_STRING_SVTOOLS_HTML_S_ccedil; break; + case 232: pStr = OOO_STRING_SVTOOLS_HTML_S_egrave; break; + case 233: pStr = OOO_STRING_SVTOOLS_HTML_S_eacute; break; + case 234: pStr = OOO_STRING_SVTOOLS_HTML_S_ecirc; break; + case 235: pStr = OOO_STRING_SVTOOLS_HTML_S_euml; break; + case 236: pStr = OOO_STRING_SVTOOLS_HTML_S_igrave; break; + case 237: pStr = OOO_STRING_SVTOOLS_HTML_S_iacute; break; + case 238: pStr = OOO_STRING_SVTOOLS_HTML_S_icirc; break; + case 239: pStr = OOO_STRING_SVTOOLS_HTML_S_iuml; break; + case 240: pStr = OOO_STRING_SVTOOLS_HTML_S_eth; break; + case 241: pStr = OOO_STRING_SVTOOLS_HTML_S_ntilde; break; + case 242: pStr = OOO_STRING_SVTOOLS_HTML_S_ograve; break; + case 243: pStr = OOO_STRING_SVTOOLS_HTML_S_oacute; break; + case 244: pStr = OOO_STRING_SVTOOLS_HTML_S_ocirc; break; + case 245: pStr = OOO_STRING_SVTOOLS_HTML_S_otilde; break; + case 246: pStr = OOO_STRING_SVTOOLS_HTML_S_ouml; break; + case 247: pStr = OOO_STRING_SVTOOLS_HTML_S_divide; break; + case 248: pStr = OOO_STRING_SVTOOLS_HTML_S_oslash; break; + case 249: pStr = OOO_STRING_SVTOOLS_HTML_S_ugrave; break; + case 250: pStr = OOO_STRING_SVTOOLS_HTML_S_uacute; break; + case 251: pStr = OOO_STRING_SVTOOLS_HTML_S_ucirc; break; + case 252: pStr = OOO_STRING_SVTOOLS_HTML_S_uuml; break; + case 253: pStr = OOO_STRING_SVTOOLS_HTML_S_yacute; break; + case 254: pStr = OOO_STRING_SVTOOLS_HTML_S_thorn; break; + case 255: pStr = OOO_STRING_SVTOOLS_HTML_S_yuml; break; + + case 338: pStr = OOO_STRING_SVTOOLS_HTML_S_OElig; break; + case 339: pStr = OOO_STRING_SVTOOLS_HTML_S_oelig; break; + case 352: pStr = OOO_STRING_SVTOOLS_HTML_S_Scaron; break; + case 353: pStr = OOO_STRING_SVTOOLS_HTML_S_scaron; break; + case 376: pStr = OOO_STRING_SVTOOLS_HTML_S_Yuml; break; + case 402: pStr = OOO_STRING_SVTOOLS_HTML_S_fnof; break; + case 710: pStr = OOO_STRING_SVTOOLS_HTML_S_circ; break; + case 732: pStr = OOO_STRING_SVTOOLS_HTML_S_tilde; break; + + // Greek chars are handled later, + // since they should *not* be transformed to entities + // when generating Greek text (== using Greek encoding) + + case 8194: pStr = OOO_STRING_SVTOOLS_HTML_S_ensp; break; + case 8195: pStr = OOO_STRING_SVTOOLS_HTML_S_emsp; break; + case 8201: pStr = OOO_STRING_SVTOOLS_HTML_S_thinsp; break; + case 8204: pStr = OOO_STRING_SVTOOLS_HTML_S_zwnj; break; + case 8205: pStr = OOO_STRING_SVTOOLS_HTML_S_zwj; break; + case 8206: pStr = OOO_STRING_SVTOOLS_HTML_S_lrm; break; + case 8207: pStr = OOO_STRING_SVTOOLS_HTML_S_rlm; break; + case 8211: pStr = OOO_STRING_SVTOOLS_HTML_S_ndash; break; + case 8212: pStr = OOO_STRING_SVTOOLS_HTML_S_mdash; break; + case 8216: pStr = OOO_STRING_SVTOOLS_HTML_S_lsquo; break; + case 8217: pStr = OOO_STRING_SVTOOLS_HTML_S_rsquo; break; + case 8218: pStr = OOO_STRING_SVTOOLS_HTML_S_sbquo; break; + case 8220: pStr = OOO_STRING_SVTOOLS_HTML_S_ldquo; break; + case 8221: pStr = OOO_STRING_SVTOOLS_HTML_S_rdquo; break; + case 8222: pStr = OOO_STRING_SVTOOLS_HTML_S_bdquo; break; + case 8224: pStr = OOO_STRING_SVTOOLS_HTML_S_dagger; break; + case 8225: pStr = OOO_STRING_SVTOOLS_HTML_S_Dagger; break; + case 8226: pStr = OOO_STRING_SVTOOLS_HTML_S_bull; break; + case 8230: pStr = OOO_STRING_SVTOOLS_HTML_S_hellip; break; + case 8240: pStr = OOO_STRING_SVTOOLS_HTML_S_permil; break; + case 8242: pStr = OOO_STRING_SVTOOLS_HTML_S_prime; break; + case 8243: pStr = OOO_STRING_SVTOOLS_HTML_S_Prime; break; + case 8249: pStr = OOO_STRING_SVTOOLS_HTML_S_lsaquo; break; + case 8250: pStr = OOO_STRING_SVTOOLS_HTML_S_rsaquo; break; + case 8254: pStr = OOO_STRING_SVTOOLS_HTML_S_oline; break; + case 8260: pStr = OOO_STRING_SVTOOLS_HTML_S_frasl; break; + case 8364: pStr = OOO_STRING_SVTOOLS_HTML_S_euro; break; + case 8465: pStr = OOO_STRING_SVTOOLS_HTML_S_image; break; + case 8472: pStr = OOO_STRING_SVTOOLS_HTML_S_weierp; break; + case 8476: pStr = OOO_STRING_SVTOOLS_HTML_S_real; break; + case 8482: pStr = OOO_STRING_SVTOOLS_HTML_S_trade; break; + case 8501: pStr = OOO_STRING_SVTOOLS_HTML_S_alefsym; break; + case 8592: pStr = OOO_STRING_SVTOOLS_HTML_S_larr; break; + case 8593: pStr = OOO_STRING_SVTOOLS_HTML_S_uarr; break; + case 8594: pStr = OOO_STRING_SVTOOLS_HTML_S_rarr; break; + case 8595: pStr = OOO_STRING_SVTOOLS_HTML_S_darr; break; + case 8596: pStr = OOO_STRING_SVTOOLS_HTML_S_harr; break; + case 8629: pStr = OOO_STRING_SVTOOLS_HTML_S_crarr; break; + case 8656: pStr = OOO_STRING_SVTOOLS_HTML_S_lArr; break; + case 8657: pStr = OOO_STRING_SVTOOLS_HTML_S_uArr; break; + case 8658: pStr = OOO_STRING_SVTOOLS_HTML_S_rArr; break; + case 8659: pStr = OOO_STRING_SVTOOLS_HTML_S_dArr; break; + case 8660: pStr = OOO_STRING_SVTOOLS_HTML_S_hArr; break; + case 8704: pStr = OOO_STRING_SVTOOLS_HTML_S_forall; break; + case 8706: pStr = OOO_STRING_SVTOOLS_HTML_S_part; break; + case 8707: pStr = OOO_STRING_SVTOOLS_HTML_S_exist; break; + case 8709: pStr = OOO_STRING_SVTOOLS_HTML_S_empty; break; + case 8711: pStr = OOO_STRING_SVTOOLS_HTML_S_nabla; break; + case 8712: pStr = OOO_STRING_SVTOOLS_HTML_S_isin; break; + case 8713: pStr = OOO_STRING_SVTOOLS_HTML_S_notin; break; + case 8715: pStr = OOO_STRING_SVTOOLS_HTML_S_ni; break; + case 8719: pStr = OOO_STRING_SVTOOLS_HTML_S_prod; break; + case 8721: pStr = OOO_STRING_SVTOOLS_HTML_S_sum; break; + case 8722: pStr = OOO_STRING_SVTOOLS_HTML_S_minus; break; + case 8727: pStr = OOO_STRING_SVTOOLS_HTML_S_lowast; break; + case 8730: pStr = OOO_STRING_SVTOOLS_HTML_S_radic; break; + case 8733: pStr = OOO_STRING_SVTOOLS_HTML_S_prop; break; + case 8734: pStr = OOO_STRING_SVTOOLS_HTML_S_infin; break; + case 8736: pStr = OOO_STRING_SVTOOLS_HTML_S_ang; break; + case 8743: pStr = OOO_STRING_SVTOOLS_HTML_S_and; break; + case 8744: pStr = OOO_STRING_SVTOOLS_HTML_S_or; break; + case 8745: pStr = OOO_STRING_SVTOOLS_HTML_S_cap; break; + case 8746: pStr = OOO_STRING_SVTOOLS_HTML_S_cup; break; + case 8747: pStr = OOO_STRING_SVTOOLS_HTML_S_int; break; + case 8756: pStr = OOO_STRING_SVTOOLS_HTML_S_there4; break; + case 8764: pStr = OOO_STRING_SVTOOLS_HTML_S_sim; break; + case 8773: pStr = OOO_STRING_SVTOOLS_HTML_S_cong; break; + case 8776: pStr = OOO_STRING_SVTOOLS_HTML_S_asymp; break; + case 8800: pStr = OOO_STRING_SVTOOLS_HTML_S_ne; break; + case 8801: pStr = OOO_STRING_SVTOOLS_HTML_S_equiv; break; + case 8804: pStr = OOO_STRING_SVTOOLS_HTML_S_le; break; + case 8805: pStr = OOO_STRING_SVTOOLS_HTML_S_ge; break; + case 8834: pStr = OOO_STRING_SVTOOLS_HTML_S_sub; break; + case 8835: pStr = OOO_STRING_SVTOOLS_HTML_S_sup; break; + case 8836: pStr = OOO_STRING_SVTOOLS_HTML_S_nsub; break; + case 8838: pStr = OOO_STRING_SVTOOLS_HTML_S_sube; break; + case 8839: pStr = OOO_STRING_SVTOOLS_HTML_S_supe; break; + case 8853: pStr = OOO_STRING_SVTOOLS_HTML_S_oplus; break; + case 8855: pStr = OOO_STRING_SVTOOLS_HTML_S_otimes; break; + case 8869: pStr = OOO_STRING_SVTOOLS_HTML_S_perp; break; + case 8901: pStr = OOO_STRING_SVTOOLS_HTML_S_sdot; break; + case 8968: pStr = OOO_STRING_SVTOOLS_HTML_S_lceil; break; + case 8969: pStr = OOO_STRING_SVTOOLS_HTML_S_rceil; break; + case 8970: pStr = OOO_STRING_SVTOOLS_HTML_S_lfloor; break; + case 8971: pStr = OOO_STRING_SVTOOLS_HTML_S_rfloor; break; + case 9001: pStr = OOO_STRING_SVTOOLS_HTML_S_lang; break; + case 9002: pStr = OOO_STRING_SVTOOLS_HTML_S_rang; break; + case 9674: pStr = OOO_STRING_SVTOOLS_HTML_S_loz; break; + case 9824: pStr = OOO_STRING_SVTOOLS_HTML_S_spades; break; + case 9827: pStr = OOO_STRING_SVTOOLS_HTML_S_clubs; break; + case 9829: pStr = OOO_STRING_SVTOOLS_HTML_S_hearts; break; + case 9830: pStr = OOO_STRING_SVTOOLS_HTML_S_diams; break; + } + + // Greek chars: if we do not produce a Greek encoding, + // transform them into entities + if( !pStr && + ( eDestEnc != RTL_TEXTENCODING_ISO_8859_7 ) && + ( eDestEnc != RTL_TEXTENCODING_MS_1253 ) ) + { + switch( c ) + { + case 913: pStr = OOO_STRING_SVTOOLS_HTML_S_Alpha; break; + case 914: pStr = OOO_STRING_SVTOOLS_HTML_S_Beta; break; + case 915: pStr = OOO_STRING_SVTOOLS_HTML_S_Gamma; break; + case 916: pStr = OOO_STRING_SVTOOLS_HTML_S_Delta; break; + case 917: pStr = OOO_STRING_SVTOOLS_HTML_S_Epsilon; break; + case 918: pStr = OOO_STRING_SVTOOLS_HTML_S_Zeta; break; + case 919: pStr = OOO_STRING_SVTOOLS_HTML_S_Eta; break; + case 920: pStr = OOO_STRING_SVTOOLS_HTML_S_Theta; break; + case 921: pStr = OOO_STRING_SVTOOLS_HTML_S_Iota; break; + case 922: pStr = OOO_STRING_SVTOOLS_HTML_S_Kappa; break; + case 923: pStr = OOO_STRING_SVTOOLS_HTML_S_Lambda; break; + case 924: pStr = OOO_STRING_SVTOOLS_HTML_S_Mu; break; + case 925: pStr = OOO_STRING_SVTOOLS_HTML_S_Nu; break; + case 926: pStr = OOO_STRING_SVTOOLS_HTML_S_Xi; break; + case 927: pStr = OOO_STRING_SVTOOLS_HTML_S_Omicron; break; + case 928: pStr = OOO_STRING_SVTOOLS_HTML_S_Pi; break; + case 929: pStr = OOO_STRING_SVTOOLS_HTML_S_Rho; break; + case 931: pStr = OOO_STRING_SVTOOLS_HTML_S_Sigma; break; + case 932: pStr = OOO_STRING_SVTOOLS_HTML_S_Tau; break; + case 933: pStr = OOO_STRING_SVTOOLS_HTML_S_Upsilon; break; + case 934: pStr = OOO_STRING_SVTOOLS_HTML_S_Phi; break; + case 935: pStr = OOO_STRING_SVTOOLS_HTML_S_Chi; break; + case 936: pStr = OOO_STRING_SVTOOLS_HTML_S_Psi; break; + case 937: pStr = OOO_STRING_SVTOOLS_HTML_S_Omega; break; + case 945: pStr = OOO_STRING_SVTOOLS_HTML_S_alpha; break; + case 946: pStr = OOO_STRING_SVTOOLS_HTML_S_beta; break; + case 947: pStr = OOO_STRING_SVTOOLS_HTML_S_gamma; break; + case 948: pStr = OOO_STRING_SVTOOLS_HTML_S_delta; break; + case 949: pStr = OOO_STRING_SVTOOLS_HTML_S_epsilon; break; + case 950: pStr = OOO_STRING_SVTOOLS_HTML_S_zeta; break; + case 951: pStr = OOO_STRING_SVTOOLS_HTML_S_eta; break; + case 952: pStr = OOO_STRING_SVTOOLS_HTML_S_theta; break; + case 953: pStr = OOO_STRING_SVTOOLS_HTML_S_iota; break; + case 954: pStr = OOO_STRING_SVTOOLS_HTML_S_kappa; break; + case 955: pStr = OOO_STRING_SVTOOLS_HTML_S_lambda; break; + case 956: pStr = OOO_STRING_SVTOOLS_HTML_S_mu; break; + case 957: pStr = OOO_STRING_SVTOOLS_HTML_S_nu; break; + case 958: pStr = OOO_STRING_SVTOOLS_HTML_S_xi; break; + case 959: pStr = OOO_STRING_SVTOOLS_HTML_S_omicron; break; + case 960: pStr = OOO_STRING_SVTOOLS_HTML_S_pi; break; + case 961: pStr = OOO_STRING_SVTOOLS_HTML_S_rho; break; + case 962: pStr = OOO_STRING_SVTOOLS_HTML_S_sigmaf; break; + case 963: pStr = OOO_STRING_SVTOOLS_HTML_S_sigma; break; + case 964: pStr = OOO_STRING_SVTOOLS_HTML_S_tau; break; + case 965: pStr = OOO_STRING_SVTOOLS_HTML_S_upsilon; break; + case 966: pStr = OOO_STRING_SVTOOLS_HTML_S_phi; break; + case 967: pStr = OOO_STRING_SVTOOLS_HTML_S_chi; break; + case 968: pStr = OOO_STRING_SVTOOLS_HTML_S_psi; break; + case 969: pStr = OOO_STRING_SVTOOLS_HTML_S_omega; break; + case 977: pStr = OOO_STRING_SVTOOLS_HTML_S_thetasym;break; + case 978: pStr = OOO_STRING_SVTOOLS_HTML_S_upsih; break; + case 982: pStr = OOO_STRING_SVTOOLS_HTML_S_piv; break; + } + } + + return pStr; +} + +static sal_Size lcl_FlushContext(char* pBuffer, sal_uInt32 nFlags) +{ + sal_uInt32 nInfo = 0; + sal_Size nSrcChars; + sal_Size nLen = convertUnicodeToText(nullptr, 0, + pBuffer, TXTCONV_BUFFER_SIZE, nFlags|RTL_UNICODETOTEXT_FLAGS_FLUSH, + &nInfo, &nSrcChars); + DBG_ASSERT((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR|RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) == 0, "HTMLOut: error while flushing"); + return nLen; +} + +static OString lcl_ConvertCharToHTML( sal_uInt32 c, + OUString *pNonConvertableChars ) +{ + assert(rtl::isUnicodeCodePoint(c)); + + OStringBuffer aDest; + const char *pStr = nullptr; + switch( c ) + { + case 0xA0: // is a hard blank + pStr = OOO_STRING_SVTOOLS_HTML_S_nbsp; + break; + case 0x2011: // is a hard hyphen + pStr = "#8209"; + break; + case 0xAD: // is a soft hyphen + pStr = OOO_STRING_SVTOOLS_HTML_S_shy; + break; + default: + // There may be an entity for the character. + // The new HTML4 entities above 255 are not used for UTF-8, + // because Netscape 4 does support UTF-8 but does not support + // these entities. + if( c < 128 ) + pStr = lcl_svhtml_GetEntityForChar( c, RTL_TEXTENCODING_UTF8 ); + break; + } + + char cBuffer[TXTCONV_BUFFER_SIZE]; + const sal_uInt32 nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE| + RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE| + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR| + RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR; + if( pStr ) + { + sal_Size nLen = lcl_FlushContext(cBuffer, nFlags); + char *pBuffer = cBuffer; + while( nLen-- ) + aDest.append(*pBuffer++); + aDest.append(OString::Concat("&") + pStr + ";"); + } + else + { + sal_uInt32 nInfo = 0; + sal_Size nSrcChars; + + sal_Unicode utf16[2]; + auto n = rtl::splitSurrogates(c, utf16); + sal_Size nLen = convertUnicodeToText(utf16, n, + cBuffer, TXTCONV_BUFFER_SIZE, + nFlags, &nInfo, &nSrcChars); + if( nLen > 0 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR|RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) == 0 ) + { + char *pBuffer = cBuffer; + while( nLen-- ) + aDest.append(*pBuffer++); + } + else + { + // If the character could not be converted to the destination + // character set, the UNICODE character is exported as character + // entity. + // coverity[callee_ptr_arith] - its ok + nLen = lcl_FlushContext(cBuffer, nFlags); + char *pBuffer = cBuffer; + while( nLen-- ) + aDest.append(*pBuffer++); + + aDest.append("&#" + OString::number(static_cast<sal_Int32>(c)) + // Unicode code points guaranteed to fit into sal_Int32 + + ";"); + if( pNonConvertableChars ) + { + OUString cs(&c, 1); + if( -1 == pNonConvertableChars->indexOf( cs ) ) + (*pNonConvertableChars) += cs; + } + } + } + return aDest.makeStringAndClear(); +} + +static OString lcl_FlushToAscii() +{ + OStringBuffer aDest; + + char cBuffer[TXTCONV_BUFFER_SIZE]; + const sal_uInt32 nFlags = RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE| + RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE| + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR| + RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR; + sal_Size nLen = lcl_FlushContext(cBuffer, nFlags); + char *pBuffer = cBuffer; + while( nLen-- ) + aDest.append(*pBuffer++); + return aDest.makeStringAndClear(); +} + +OString HTMLOutFuncs::ConvertStringToHTML( const OUString& rSrc, + OUString *pNonConvertableChars ) +{ + OStringBuffer aDest; + for( sal_Int32 i=0, nLen = rSrc.getLength(); i < nLen; ) + aDest.append(lcl_ConvertCharToHTML( + rSrc.iterateCodePoints(&i), pNonConvertableChars)); + aDest.append(lcl_FlushToAscii()); + return aDest.makeStringAndClear(); +} + +SvStream& HTMLOutFuncs::Out_AsciiTag( SvStream& rStream, std::string_view rStr, + bool bOn ) +{ + if(bOn) + rStream.WriteOString("<"); + else + rStream.WriteOString("</"); + + rStream.WriteOString(rStr).WriteChar('>'); + + return rStream; +} + +SvStream& HTMLOutFuncs::Out_Char( SvStream& rStream, sal_uInt32 c, + OUString *pNonConvertableChars ) +{ + OString sOut = lcl_ConvertCharToHTML( c, pNonConvertableChars ); + rStream.WriteOString( sOut ); + return rStream; +} + +SvStream& HTMLOutFuncs::Out_String( SvStream& rStream, const OUString& rOUStr, + OUString *pNonConvertableChars ) +{ + sal_Int32 nLen = rOUStr.getLength(); + for( sal_Int32 n = 0; n < nLen; ) + HTMLOutFuncs::Out_Char( rStream, rOUStr.iterateCodePoints(&n), + pNonConvertableChars ); + HTMLOutFuncs::FlushToAscii( rStream ); + return rStream; +} + +SvStream& HTMLOutFuncs::FlushToAscii( SvStream& rStream ) +{ + OString sOut = lcl_FlushToAscii(); + + if (!sOut.isEmpty()) + rStream.WriteOString( sOut ); + + return rStream; +} + +SvStream& HTMLOutFuncs::Out_Hex( SvStream& rStream, sal_uInt32 nHex, sal_uInt8 nLen ) +{ // out into a stream + char aNToABuf[] = "0000000000000000"; + + DBG_ASSERT( nLen < sizeof(aNToABuf), "too many places" ); + if( nLen>=sizeof(aNToABuf) ) + nLen = (sizeof(aNToABuf)-1); + + // set pointer to end of buffer + char *pStr = aNToABuf + (sizeof(aNToABuf)-1); + for( sal_uInt8 n = 0; n < nLen; ++n ) + { + *(--pStr) = static_cast<char>(nHex & 0xf ) + 48; + if( *pStr > '9' ) + *pStr += 39; + nHex >>= 4; + } + return rStream.WriteOString( pStr ); +} + + +SvStream& HTMLOutFuncs::Out_Color( SvStream& rStream, const Color& rColor, bool bXHTML ) +{ + rStream.WriteOString( "\"" ); + if (bXHTML) + rStream.WriteOString( "color: " ); + rStream.WriteOString( "#" ); + if( rColor == COL_AUTO ) + { + rStream.WriteOString( "000000" ); + } + else + { + Out_Hex( rStream, rColor.GetRed(), 2 ); + Out_Hex( rStream, rColor.GetGreen(), 2 ); + Out_Hex( rStream, rColor.GetBlue(), 2 ); + } + rStream.WriteChar( '\"' ); + + return rStream; +} + +SvStream& HTMLOutFuncs::Out_ImageMap( SvStream& rStream, + const OUString& rBaseURL, + const ImageMap& rIMap, + const OUString& rName, + const HTMLOutEvent *pEventTable, + bool bOutStarBasic, + const char *pDelim, + const char *pIndentArea, + const char *pIndentMap ) +{ + const OUString& rOutName = !rName.isEmpty() ? rName : rIMap.GetName(); + DBG_ASSERT( !rOutName.isEmpty(), "No ImageMap-Name" ); + if( rOutName.isEmpty() ) + return rStream; + + OStringBuffer sOut = + OString::Concat("<") + + OOO_STRING_SVTOOLS_HTML_map + " " + OOO_STRING_SVTOOLS_HTML_O_name + "=\""; + rStream.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStream, rOutName ); + rStream.WriteOString( "\">" ); + + for( size_t i=0; i<rIMap.GetIMapObjectCount(); i++ ) + { + const IMapObject* pObj = rIMap.GetIMapObject( i ); + DBG_ASSERT( pObj, "Where is the ImageMap-Object?" ); + + if( pObj ) + { + const char *pShape = nullptr; + OString aCoords; + switch( pObj->GetType() ) + { + case IMapObjectType::Rectangle: + { + const IMapRectangleObject* pRectObj = + static_cast<const IMapRectangleObject *>(pObj); + pShape = OOO_STRING_SVTOOLS_HTML_SH_rect; + tools::Rectangle aRect( pRectObj->GetRectangle() ); + + aCoords = + OString::number(static_cast<sal_Int32>(aRect.Left())) + + "," + + OString::number(static_cast<sal_Int32>(aRect.Top())) + + "," + + OString::number(static_cast<sal_Int32>(aRect.Right())) + + "," + + OString::number(static_cast<sal_Int32>(aRect.Bottom()));; + } + break; + case IMapObjectType::Circle: + { + const IMapCircleObject* pCirc = + static_cast<const IMapCircleObject *>(pObj); + pShape= OOO_STRING_SVTOOLS_HTML_SH_circ; + Point aCenter( pCirc->GetCenter() ); + tools::Long nOff = pCirc->GetRadius(); + + aCoords = + OString::number(static_cast<sal_Int32>(aCenter.X())) + + "," + + OString::number(static_cast<sal_Int32>(aCenter.Y())) + + "," + + OString::number(static_cast<sal_Int32>(nOff)); + } + break; + case IMapObjectType::Polygon: + { + const IMapPolygonObject* pPolyObj = + static_cast<const IMapPolygonObject *>(pObj); + pShape= OOO_STRING_SVTOOLS_HTML_SH_poly; + tools::Polygon aPoly( pPolyObj->GetPolygon() ); + sal_uInt16 nCount = aPoly.GetSize(); + OString aTmpBuf; + if( nCount>0 ) + { + const Point& rPoint = aPoly[0]; + aTmpBuf = OString::number(static_cast<sal_Int32>(rPoint.X())) + + "," + + OString::number(static_cast<sal_Int32>(rPoint.Y())); + } + for( sal_uInt16 j=1; j<nCount; j++ ) + { + const Point& rPoint = aPoly[j]; + aTmpBuf = + "," + + OString::number(static_cast<sal_Int32>(rPoint.X())) + + "," + + OString::number(static_cast<sal_Int32>(rPoint.Y())); + } + aCoords = aTmpBuf; + } + break; + default: + DBG_ASSERT( pShape, "unknown IMapObject" ); + break; + } + + if( pShape ) + { + if( pDelim ) + rStream.WriteOString( pDelim ); + if( pIndentArea ) + rStream.WriteOString( pIndentArea ); + + sOut.append(OString::Concat("<") + OOO_STRING_SVTOOLS_HTML_area + " " OOO_STRING_SVTOOLS_HTML_O_shape + "=" + pShape + " " + OOO_STRING_SVTOOLS_HTML_O_coords "=\"" + + aCoords + "\" "); + rStream.WriteOString( sOut ); + sOut.setLength(0); + + OUString aURL( pObj->GetURL() ); + if( !aURL.isEmpty() && pObj->IsActive() ) + { + aURL = URIHelper::simpleNormalizedMakeRelative( + rBaseURL, aURL ); + sOut.append(OOO_STRING_SVTOOLS_HTML_O_href "=\""); + rStream.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStream, aURL ).WriteChar( '\"' ); + } + else + rStream.WriteOString( OOO_STRING_SVTOOLS_HTML_O_nohref ); + + const OUString& rObjName = pObj->GetName(); + if( !rObjName.isEmpty() ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_name "=\""); + rStream.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStream, rObjName ).WriteChar( '\"' ); + } + + const OUString& rTarget = pObj->GetTarget(); + if( !rTarget.isEmpty() && pObj->IsActive() ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_target "=\""); + rStream.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStream, rTarget ).WriteChar( '\"' ); + } + + OUString rDesc( pObj->GetAltText() ); + if( rDesc.isEmpty() ) + rDesc = pObj->GetDesc(); + + if( !rDesc.isEmpty() ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_alt "=\""); + rStream.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStream, rDesc ).WriteChar( '\"' ); + } + + const SvxMacroTableDtor& rMacroTab = pObj->GetMacroTable(); + if( pEventTable && !rMacroTab.empty() ) + Out_Events( rStream, rMacroTab, pEventTable, + bOutStarBasic ); + + rStream.WriteChar( '>' ); + } + } + + } + + if( pDelim ) + rStream.WriteOString( pDelim ); + if( pIndentMap ) + rStream.WriteOString( pIndentMap ); + Out_AsciiTag( rStream, OOO_STRING_SVTOOLS_HTML_map, false ); + + return rStream; +} + +SvStream& HTMLOutFuncs::OutScript( SvStream& rStrm, + const OUString& rBaseURL, + std::u16string_view rSource, + const OUString& rLanguage, + ScriptType eScriptType, + const OUString& rSrc, + const OUString *pSBLibrary, + const OUString *pSBModule ) +{ + // script is not indented! + OStringBuffer sOut("<" OOO_STRING_SVTOOLS_HTML_script); + + if( !rLanguage.isEmpty() ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_language "=\""); + rStrm.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStrm, rLanguage ); + sOut.append('\"'); + } + + if( !rSrc.isEmpty() ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_src "=\""); + rStrm.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStrm, URIHelper::simpleNormalizedMakeRelative(rBaseURL, rSrc) ); + sOut.append('\"'); + } + + if( STARBASIC != eScriptType && pSBLibrary ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_sdlibrary "=\""); + rStrm.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStrm, *pSBLibrary ); + sOut.append('\"'); + } + + if( STARBASIC != eScriptType && pSBModule ) + { + sOut.append(" " OOO_STRING_SVTOOLS_HTML_O_sdmodule "=\""); + rStrm.WriteOString( sOut ); + sOut.setLength(0); + Out_String( rStrm, *pSBModule ); + sOut.append('\"'); + } + + sOut.append('>'); + + rStrm.WriteOString( sOut ); + sOut.setLength(0); + + if( !rSource.empty() || pSBLibrary || pSBModule ) + { + rStrm.WriteOString( SAL_NEWLINE_STRING ); + + if( JAVASCRIPT != eScriptType ) + { + rStrm.WriteOString( "<!--" ) + .WriteOString( SAL_NEWLINE_STRING ); + } + + if( STARBASIC == eScriptType ) + { + if( pSBLibrary ) + { + sOut.append("' " OOO_STRING_SVTOOLS_HTML_SB_library " " + + OUStringToOString(*pSBLibrary, RTL_TEXTENCODING_UTF8)); + rStrm.WriteOString( sOut ).WriteOString( SAL_NEWLINE_STRING ); + sOut.setLength(0); + } + + if( pSBModule ) + { + sOut.append("' " OOO_STRING_SVTOOLS_HTML_SB_module " " + + OUStringToOString(*pSBModule, RTL_TEXTENCODING_UTF8)); + rStrm.WriteOString( sOut ).WriteOString( SAL_NEWLINE_STRING ); + sOut.setLength(0); + } + } + + if( !rSource.empty() ) + { + // we write the module in ANSI-charset, but with + // the system new line. + const OString sSource(OUStringToOString(rSource, RTL_TEXTENCODING_UTF8)); + rStrm.WriteOString( sSource ).WriteOString( SAL_NEWLINE_STRING ); + } + rStrm.WriteOString( SAL_NEWLINE_STRING ); + + if( JAVASCRIPT != eScriptType ) + { + // MIB/MM: if it is not StarBasic, a // could be wrong. + // As the comment is removed during reading, it is not helping us... + rStrm.WriteOString( STARBASIC == eScriptType ? "' -->" : "// -->" ) + .WriteOString( SAL_NEWLINE_STRING ); + } + } + + HTMLOutFuncs::Out_AsciiTag( rStrm, OOO_STRING_SVTOOLS_HTML_script, false ); + + return rStrm; +} + + +SvStream& HTMLOutFuncs::Out_Events( SvStream& rStrm, + const SvxMacroTableDtor& rMacroTable, + const HTMLOutEvent *pEventTable, + bool bOutStarBasic, + OUString *pNonConvertableChars ) +{ + sal_uInt16 i=0; + while( pEventTable[i].pBasicName || pEventTable[i].pJavaName ) + { + const SvxMacro *pMacro = + rMacroTable.Get( pEventTable[i].nEvent ); + + if( pMacro && pMacro->HasMacro() && + ( JAVASCRIPT == pMacro->GetScriptType() || bOutStarBasic )) + { + const char *pStr = STARBASIC == pMacro->GetScriptType() + ? pEventTable[i].pBasicName + : pEventTable[i].pJavaName; + + if( pStr ) + { + OString sOut = OString::Concat(" ") + pStr + "=\""; + rStrm.WriteOString( sOut ); + + Out_String( rStrm, pMacro->GetMacName(), pNonConvertableChars ).WriteChar( '\"' ); + } + } + i++; + } + + return rStrm; +} + +OString HTMLOutFuncs::CreateTableDataOptionsValNum( + bool bValue, + double fVal, sal_uInt32 nFormat, SvNumberFormatter& rFormatter, + OUString* pNonConvertableChars) +{ + OStringBuffer aStrTD; + + if ( bValue ) + { + // printf / scanf is not precise enough + OUString aValStr; + rFormatter.GetInputLineString( fVal, 0, aValStr ); + OString sTmp(OUStringToOString(aValStr, RTL_TEXTENCODING_UTF8)); + aStrTD.append(" " OOO_STRING_SVTOOLS_HTML_O_SDval "=\"" + + sTmp + "\""); + } + if ( bValue || nFormat ) + { + aStrTD.append(" " OOO_STRING_SVTOOLS_HTML_O_SDnum "=\"" + + OString::number(static_cast<sal_uInt16>( + Application::GetSettings().GetLanguageTag().getLanguageType())) + + ";"); // Language for Format 0 + if ( nFormat ) + { + OString aNumStr; + LanguageType nLang; + const SvNumberformat* pFormatEntry = rFormatter.GetEntry( nFormat ); + if ( pFormatEntry ) + { + aNumStr = ConvertStringToHTML( pFormatEntry->GetFormatstring(), + pNonConvertableChars ); + nLang = pFormatEntry->GetLanguage(); + } + else + nLang = LANGUAGE_SYSTEM; + aStrTD.append( + OString::number(static_cast<sal_Int32>(static_cast<sal_uInt16>(nLang))) + + ";" + + aNumStr); + } + aStrTD.append('\"'); + } + return aStrTD.makeStringAndClear(); +} + +bool HTMLOutFuncs::PrivateURLToInternalImg( OUString& rURL ) +{ + if( rURL.startsWith(OOO_STRING_SVTOOLS_HTML_private_image) ) + { + rURL = rURL.copy( strlen(OOO_STRING_SVTOOLS_HTML_private_image) ); + return true; + } + + return false; +} + +void HtmlWriterHelper::applyColor(HtmlWriter& rHtmlWriter, std::string_view aAttributeName, const Color& rColor) +{ + OStringBuffer sBuffer; + + if( rColor == COL_AUTO ) + { + sBuffer.append("#000000"); + } + else + { + sBuffer.append('#'); + std::ostringstream sStringStream; + sStringStream + << std::right + << std::setfill('0') + << std::setw(6) + << std::hex + << sal_uInt32(rColor.GetRGBColor()); + sBuffer.append(sStringStream.str().c_str()); + } + + rHtmlWriter.attribute(aAttributeName, sBuffer); +} + + +void HtmlWriterHelper::applyEvents(HtmlWriter& rHtmlWriter, const SvxMacroTableDtor& rMacroTable, const HTMLOutEvent* pEventTable, bool bOutStarBasic) +{ + sal_uInt16 i = 0; + while (pEventTable[i].pBasicName || pEventTable[i].pJavaName) + { + const SvxMacro* pMacro = rMacroTable.Get(pEventTable[i].nEvent); + + if (pMacro && pMacro->HasMacro() && (JAVASCRIPT == pMacro->GetScriptType() || bOutStarBasic)) + { + const char* pAttributeName = nullptr; + if (STARBASIC == pMacro->GetScriptType()) + pAttributeName = pEventTable[i].pBasicName; + else + pAttributeName = pEventTable[i].pJavaName; + + if (pAttributeName) + { + rHtmlWriter.attribute(pAttributeName, OUStringToOString(pMacro->GetMacName(), RTL_TEXTENCODING_UTF8)); + } + } + i++; + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/svtools/source/svhtml/htmlsupp.cxx b/svtools/source/svhtml/htmlsupp.cxx new file mode 100644 index 0000000000..a418d61eb7 --- /dev/null +++ b/svtools/source/svhtml/htmlsupp.cxx @@ -0,0 +1,159 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <comphelper/string.hxx> +#include <svtools/parhtml.hxx> +#include <svtools/htmltokn.h> +#include <svtools/htmlkywd.hxx> +#include <tools/urlobj.hxx> + +// Table for converting option values into strings +HTMLOptionEnum<HTMLScriptLanguage> const aScriptLangOptEnums[] = +{ + { OOO_STRING_SVTOOLS_HTML_LG_starbasic, HTMLScriptLanguage::StarBasic }, + { OOO_STRING_SVTOOLS_HTML_LG_javascript, HTMLScriptLanguage::JavaScript }, + { OOO_STRING_SVTOOLS_HTML_LG_javascript11, HTMLScriptLanguage::JavaScript }, + { OOO_STRING_SVTOOLS_HTML_LG_livescript, HTMLScriptLanguage::JavaScript }, + { nullptr, HTMLScriptLanguage(0) } +}; + +void HTMLParser::ParseScriptOptions( OUString& rLangString, std::u16string_view rBaseURL, + HTMLScriptLanguage& rLang, + OUString& rSrc, + OUString& rLibrary, + OUString& rModule ) +{ + const HTMLOptions& aScriptOptions = GetOptions(); + + rLangString.clear(); + rLang = HTMLScriptLanguage::JavaScript; + rSrc.clear(); + rLibrary.clear(); + rModule.clear(); + + for( size_t i = aScriptOptions.size(); i; ) + { + const HTMLOption& aOption = aScriptOptions[--i]; + switch( aOption.GetToken() ) + { + case HtmlOptionId::LANGUAGE: + { + rLangString = aOption.GetString(); + HTMLScriptLanguage nLang; + if( aOption.GetEnum( nLang, aScriptLangOptEnums ) ) + rLang = nLang; + else + rLang = HTMLScriptLanguage::Unknown; + } + break; + + case HtmlOptionId::SRC: + rSrc = INetURLObject::GetAbsURL( rBaseURL, aOption.GetString() ); + break; + case HtmlOptionId::SDLIBRARY: + rLibrary = aOption.GetString(); + break; + + case HtmlOptionId::SDMODULE: + rModule = aOption.GetString(); + break; + default: break; + } + } +} + +void HTMLParser::RemoveSGMLComment( OUString &rString ) +{ + sal_Unicode c = 0; + sal_Int32 idx = 0; + while (idx < rString.getLength()) + { + c = rString[idx]; + if (!( c==' ' || c=='\t' || c=='\r' || c=='\n' ) ) + break; + idx++; + } + if (idx) + rString = rString.copy( idx ); + + idx = rString.getLength() - 1; + while (idx > 0) + // Can never get to 0 because that would mean the string contains only whitespace, and the first + // loop would already have removed all of those. + { + c = rString[idx]; + if (!( c==' ' || c=='\t' || c=='\r' || c=='\n' ) ) + break; + idx--; + } + if (idx != rString.getLength() - 1) + rString = rString.copy( 0, idx + 1 ); + + // remove SGML comments + if( rString.startsWith( "<!--" ) ) + { + // the whole line + sal_Int32 nPos = 4; + while( nPos < rString.getLength() ) + { + c = rString[nPos]; + if (c == '\r' || c == '\n') + break; + ++nPos; + } + if( c == '\r' && nPos+1 < rString.getLength() && + '\n' == rString[nPos+1] ) + ++nPos; + else if( c != '\n' ) + nPos = 3; + ++nPos; + rString = rString.copy( nPos ); + } + + if( !rString.endsWith("-->") ) + return; + + rString = rString.copy( 0, rString.getLength()-3 ); + // "//" or "'", maybe preceding CR/LF + rString = comphelper::string::stripEnd(rString, ' '); + sal_Int32 nDel = 0, nLen = rString.getLength(); + if( nLen >= 2 && + rString.endsWith("//") ) + { + nDel = 2; + } + else if( nLen && '\'' == rString[nLen-1] ) + { + nDel = 1; + } + if( nDel && nLen >= nDel+1 ) + { + c = rString[nLen-(nDel+1)]; + if( '\r'==c || '\n'==c ) + { + nDel++; + if( '\n'==c && nLen >= nDel+1 && + '\r'==rString[nLen-(nDel+1)] ) + nDel++; + } + } + rString = rString.copy( 0, nLen-nDel ); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx new file mode 100644 index 0000000000..1f87456744 --- /dev/null +++ b/svtools/source/svhtml/parhtml.cxx @@ -0,0 +1,2213 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <comphelper/string.hxx> +#include <o3tl/safeint.hxx> +#include <o3tl/string_view.hxx> +#include <tools/stream.hxx> +#include <tools/debug.hxx> +#include <tools/color.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/character.hxx> +#include <rtl/tencinfo.h> +#include <sal/log.hxx> +#include <tools/tenccvt.hxx> +#include <tools/datetime.hxx> +#include <unotools/datetime.hxx> +#include <svl/inettype.hxx> +#include <svl/lngmisc.hxx> +#include <com/sun/star/beans/PropertyAttribute.hpp> +#include <com/sun/star/document/XDocumentProperties.hpp> + +#include <svtools/parhtml.hxx> +#include <svtools/htmltokn.h> +#include <svtools/htmlkywd.hxx> + +#include <utility> + +using namespace ::com::sun::star; + + +const sal_Int32 MAX_LEN( 1024 ); + +const sal_Int32 MAX_ENTITY_LEN( 8 ); + + +// Tables to convert option values into strings + +// <INPUT TYPE=xxx> +HTMLOptionEnum<HTMLInputType> const aInputTypeOptEnums[] = +{ + { OOO_STRING_SVTOOLS_HTML_IT_text, HTMLInputType::Text }, + { OOO_STRING_SVTOOLS_HTML_IT_password, HTMLInputType::Password }, + { OOO_STRING_SVTOOLS_HTML_IT_checkbox, HTMLInputType::Checkbox }, + { OOO_STRING_SVTOOLS_HTML_IT_radio, HTMLInputType::Radio }, + { OOO_STRING_SVTOOLS_HTML_IT_range, HTMLInputType::Range }, + { OOO_STRING_SVTOOLS_HTML_IT_scribble, HTMLInputType::Scribble }, + { OOO_STRING_SVTOOLS_HTML_IT_file, HTMLInputType::File }, + { OOO_STRING_SVTOOLS_HTML_IT_hidden, HTMLInputType::Hidden }, + { OOO_STRING_SVTOOLS_HTML_IT_submit, HTMLInputType::Submit }, + { OOO_STRING_SVTOOLS_HTML_IT_image, HTMLInputType::Image }, + { OOO_STRING_SVTOOLS_HTML_IT_reset, HTMLInputType::Reset }, + { OOO_STRING_SVTOOLS_HTML_IT_button, HTMLInputType::Button }, + { nullptr, HTMLInputType(0) } +}; + +// <TABLE FRAME=xxx> +HTMLOptionEnum<HTMLTableFrame> const aTableFrameOptEnums[] = +{ + { OOO_STRING_SVTOOLS_HTML_TF_void, HTMLTableFrame::Void }, + { OOO_STRING_SVTOOLS_HTML_TF_above, HTMLTableFrame::Above }, + { OOO_STRING_SVTOOLS_HTML_TF_below, HTMLTableFrame::Below }, + { OOO_STRING_SVTOOLS_HTML_TF_hsides, HTMLTableFrame::HSides }, + { OOO_STRING_SVTOOLS_HTML_TF_lhs, HTMLTableFrame::LHS }, + { OOO_STRING_SVTOOLS_HTML_TF_rhs, HTMLTableFrame::RHS }, + { OOO_STRING_SVTOOLS_HTML_TF_vsides, HTMLTableFrame::VSides }, + { OOO_STRING_SVTOOLS_HTML_TF_box, HTMLTableFrame::Box }, + { OOO_STRING_SVTOOLS_HTML_TF_border, HTMLTableFrame::Box }, + { nullptr, HTMLTableFrame(0) } +}; + +// <TABLE RULES=xxx> +HTMLOptionEnum<HTMLTableRules> const aTableRulesOptEnums[] = +{ + { OOO_STRING_SVTOOLS_HTML_TR_none, HTMLTableRules::NONE }, + { OOO_STRING_SVTOOLS_HTML_TR_groups, HTMLTableRules::Groups }, + { OOO_STRING_SVTOOLS_HTML_TR_rows, HTMLTableRules::Rows }, + { OOO_STRING_SVTOOLS_HTML_TR_cols, HTMLTableRules::Cols }, + { OOO_STRING_SVTOOLS_HTML_TR_all, HTMLTableRules::All }, + { nullptr, HTMLTableRules(0) } +}; + + +HTMLOption::HTMLOption( HtmlOptionId nTok, OUString _aToken, + OUString _aValue ) + : aValue(std::move(_aValue)) + , aToken(std::move(_aToken)) + , nToken( nTok ) +{ + DBG_ASSERT( nToken>=HtmlOptionId::BOOL_START && nToken<HtmlOptionId::END, + "HTMLOption: unknown token" ); +} + +sal_uInt32 HTMLOption::GetNumber() const +{ + DBG_ASSERT( (nToken>=HtmlOptionId::NUMBER_START && + nToken<HtmlOptionId::NUMBER_END) || + (nToken>=HtmlOptionId::CONTEXT_START && + nToken<HtmlOptionId::CONTEXT_END) || + nToken==HtmlOptionId::VALUE, + "GetNumber: Option not numerical" ); + OUString aTmp(comphelper::string::stripStart(aValue, ' ')); + sal_Int32 nTmp = aTmp.toInt32(); + return nTmp >= 0 ? static_cast<sal_uInt32>(nTmp) : 0; +} + +sal_Int32 HTMLOption::GetSNumber() const +{ + DBG_ASSERT( (nToken>=HtmlOptionId::NUMBER_START && nToken<HtmlOptionId::NUMBER_END) || + (nToken>=HtmlOptionId::CONTEXT_START && nToken<HtmlOptionId::CONTEXT_END), + "GetSNumber: Option not numerical" ); + OUString aTmp(comphelper::string::stripStart(aValue, ' ')); + return aTmp.toInt32(); +} + +void HTMLOption::GetNumbers( std::vector<sal_uInt32> &rNumbers ) const +{ + rNumbers.clear(); + + // This is a very simplified scanner: it only searches all + // numerals in the string. + bool bInNum = false; + sal_uInt32 nNum = 0; + for( sal_Int32 i=0; i<aValue.getLength(); i++ ) + { + sal_Unicode c = aValue[ i ]; + if( c>='0' && c<='9' ) + { + nNum *= 10; + nNum += (c - '0'); + bInNum = true; + } + else if( bInNum ) + { + rNumbers.push_back( nNum ); + bInNum = false; + nNum = 0; + } + } + if( bInNum ) + { + rNumbers.push_back( nNum ); + } +} + +void HTMLOption::GetColor( Color& rColor ) const +{ + DBG_ASSERT( (nToken>=HtmlOptionId::COLOR_START && nToken<HtmlOptionId::COLOR_END) || nToken==HtmlOptionId::SIZE, + "GetColor: Option is not a color." ); + + OUString aTmp(aValue.toAsciiLowerCase()); + sal_uInt32 nColor = SAL_MAX_UINT32; + if (!aTmp.isEmpty() && aTmp[0] != '#') + nColor = GetHTMLColor(aTmp); + + if( SAL_MAX_UINT32 == nColor ) + { + nColor = 0; + sal_Int32 nPos = 0; + for (sal_uInt32 i=0; i<6; ++i) + { + // Whatever Netscape does to get color values, + // at maximum three characters < '0' are ignored. + sal_Unicode c = nPos<aTmp.getLength() ? aTmp[ nPos++ ] : '0'; + if( c < '0' ) + { + c = nPos<aTmp.getLength() ? aTmp[nPos++] : '0'; + if( c < '0' ) + c = nPos<aTmp.getLength() ? aTmp[nPos++] : '0'; + } + nColor *= 16; + if( c >= '0' && c <= '9' ) + nColor += (c - '0'); + else if( c >= 'a' && c <= 'f' ) + nColor += (c + 0xa - 'a'); + } + } + + rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000) >> 16) ); + rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00) >> 8)); + rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ff) ); +} + +HTMLInputType HTMLOption::GetInputType() const +{ + DBG_ASSERT( nToken==HtmlOptionId::TYPE, "GetInputType: Option not TYPE" ); + return GetEnum( aInputTypeOptEnums, HTMLInputType::Text ); +} + +HTMLTableFrame HTMLOption::GetTableFrame() const +{ + DBG_ASSERT( nToken==HtmlOptionId::FRAME, "GetTableFrame: Option not FRAME" ); + return GetEnum( aTableFrameOptEnums ); +} + +HTMLTableRules HTMLOption::GetTableRules() const +{ + DBG_ASSERT( nToken==HtmlOptionId::RULES, "GetTableRules: Option not RULES" ); + return GetEnum( aTableRulesOptEnums ); +} + +HTMLParser::HTMLParser( SvStream& rIn, bool bReadNewDoc ) : + SvParser<HtmlTokenId>( rIn ), + bNewDoc(bReadNewDoc), + bIsInHeader(true), + bReadListing(false), + bReadXMP(false), + bReadPRE(false), + bReadTextArea(false), + bReadScript(false), + bReadStyle(false), + bEndTokenFound(false), + bPre_IgnoreNewPara(false), + bReadNextChar(false), + bReadComment(false), + nPre_LinePos(0), + mnPendingOffToken(HtmlTokenId::NONE) +{ + //#i76649, default to UTF-8 for HTML unless we know differently + SetSrcEncoding(RTL_TEXTENCODING_UTF8); +} + +HTMLParser::~HTMLParser() +{ +} + +void HTMLParser::SetNamespace(std::u16string_view rNamespace) +{ + // Convert namespace alias to a prefix. + maNamespace = OUString::Concat(rNamespace) + ":"; +} + +namespace +{ + class RefGuard + { + private: + HTMLParser& m_rParser; + public: + RefGuard(HTMLParser& rParser) + : m_rParser(rParser) + { + m_rParser.AddFirstRef(); + } + + ~RefGuard() + { + if (m_rParser.GetStatus() != SvParserState::Pending) + m_rParser.ReleaseRef(); // Parser not needed anymore + } + }; +} + +SvParserState HTMLParser::CallParser() +{ + eState = SvParserState::Working; + nNextCh = GetNextChar(); + SaveState( HtmlTokenId::NONE ); + + nPre_LinePos = 0; + bPre_IgnoreNewPara = false; + + RefGuard aRefGuard(*this); + + Continue( HtmlTokenId::NONE ); + + return eState; +} + +void HTMLParser::Continue( HtmlTokenId nToken ) +{ + if( nToken == HtmlTokenId::NONE ) + nToken = GetNextToken(); + + while( IsParserWorking() ) + { + SaveState( nToken ); + nToken = FilterToken( nToken ); + + if( nToken != HtmlTokenId::NONE ) + NextToken( nToken ); + + if( IsParserWorking() ) + SaveState( HtmlTokenId::NONE ); // continue with new token + + nToken = GetNextToken(); + } +} + +HtmlTokenId HTMLParser::FilterToken( HtmlTokenId nToken ) +{ + switch( nToken ) + { + case HtmlTokenId(EOF): + nToken = HtmlTokenId::NONE; + break; // don't pass + + case HtmlTokenId::HEAD_OFF: + bIsInHeader = false; + break; + + case HtmlTokenId::HEAD_ON: + bIsInHeader = true; + break; + + case HtmlTokenId::BODY_ON: + bIsInHeader = false; + break; + + case HtmlTokenId::FRAMESET_ON: + bIsInHeader = false; + break; + + case HtmlTokenId::BODY_OFF: + bReadPRE = bReadListing = bReadXMP = false; + break; + + case HtmlTokenId::HTML_OFF: + nToken = HtmlTokenId::NONE; + bReadPRE = bReadListing = bReadXMP = false; + break; // HtmlTokenId::ON hasn't been passed either ! + + case HtmlTokenId::PREFORMTXT_ON: + StartPRE(); + break; + + case HtmlTokenId::PREFORMTXT_OFF: + FinishPRE(); + break; + + case HtmlTokenId::LISTING_ON: + StartListing(); + break; + + case HtmlTokenId::LISTING_OFF: + FinishListing(); + break; + + case HtmlTokenId::XMP_ON: + StartXMP(); + break; + + case HtmlTokenId::XMP_OFF: + FinishXMP(); + break; + + default: + if( bReadPRE ) + nToken = FilterPRE( nToken ); + else if( bReadListing ) + nToken = FilterListing( nToken ); + else if( bReadXMP ) + nToken = FilterXMP( nToken ); + + break; + } + + return nToken; +} + +namespace { + +constexpr bool HTML_ISPRINTABLE(sal_Unicode c) { return c >= 32 && c != 127; } + +constexpr bool HTML_ISSPACE(sal_uInt32 c) +{ + return ' ' == c || '\t' == c || '\r' == c || '\n' == c || '\x0b' == c; +} + +} + +HtmlTokenId HTMLParser::ScanText(const sal_Unicode cBreak) +{ + OUStringBuffer sTmpBuffer( MAX_LEN ); + bool bContinue = true; + bool bEqSignFound = false; + sal_uInt32 cQuote = 0U; + + while( bContinue && IsParserWorking() ) + { + bool bNextCh = true; + switch( nNextCh ) + { + case '&': + bEqSignFound = false; + if( bReadXMP ) + sTmpBuffer.append( '&' ); + else + { + sal_uInt64 nStreamPos = rInput.Tell(); + sal_uInt32 nLinePos = GetLinePos(); + + sal_uInt32 cChar = 0U; + if( '#' == (nNextCh = GetNextChar()) ) + { + nNextCh = GetNextChar(); + const bool bIsHex( 'x' == nNextCh ); + const bool bIsDecOrHex( bIsHex || rtl::isAsciiDigit(nNextCh) ); + if ( bIsDecOrHex ) + { + if ( bIsHex ) + { + nNextCh = GetNextChar(); + while ( rtl::isAsciiHexDigit(nNextCh) ) + { + cChar = cChar * 16U + + ( nNextCh <= '9' + ? sal_uInt32( nNextCh - '0' ) + : ( nNextCh <= 'F' + ? sal_uInt32( nNextCh - 'A' + 10 ) + : sal_uInt32( nNextCh - 'a' + 10 ) ) ); + nNextCh = GetNextChar(); + } + } + else + { + do + { + cChar = cChar * 10U + sal_uInt32( nNextCh - '0'); + nNextCh = GetNextChar(); + } + while( rtl::isAsciiDigit(nNextCh) ); + } + + if( RTL_TEXTENCODING_DONTKNOW != eSrcEnc && + RTL_TEXTENCODING_UCS2 != eSrcEnc && + RTL_TEXTENCODING_UTF8 != eSrcEnc && + cChar < 256 ) + { + const sal_uInt32 convertFlags = + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | + RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | + RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT; + + char cEncodedChar = static_cast<char>(cChar); + cChar = OUString(&cEncodedChar, 1, eSrcEnc, convertFlags).toChar(); + if( 0U == cChar ) + { + // If the character could not be + // converted, because a conversion is not + // available, do no conversion at all. + cChar = cEncodedChar; + } + } + } + else + nNextCh = 0U; + + if (!rtl::isUnicodeCodePoint(cChar) + || (linguistic::IsControlChar(cChar) + && cChar != '\r' && cChar != '\n' && cChar != '\t')) + { + cChar = '?'; + } + } + else if( rtl::isAsciiAlpha( nNextCh ) ) + { + OUStringBuffer sEntityBuffer( MAX_ENTITY_LEN ); + sal_Int32 nPos = 0; + do + { + sEntityBuffer.appendUtf32( nNextCh ); + nPos++; + nNextCh = GetNextChar(); + } + while( nPos < MAX_ENTITY_LEN && rtl::isAsciiAlphanumeric( nNextCh ) && + !rInput.eof() ); + + if( IsParserWorking() && !rInput.eof() ) + { + std::u16string_view sEntity(sEntityBuffer.subView(0, nPos)); + cChar = GetHTMLCharName( sEntity ); + + // not found ( == 0 ): plain text + // or a character which is inserted as attribute + if( 0U == cChar && ';' != nNextCh ) + { + DBG_ASSERT( rInput.Tell() - nStreamPos == + static_cast<sal_uInt64>(nPos+1)*GetCharSize(), + "UTF-8 is failing here" ); + for( sal_Int32 i = nPos-1; i>1; i-- ) + { + nNextCh = sEntityBuffer[i]; + sEntityBuffer.setLength( i ); + sEntity = sEntityBuffer.subView(0, i); + cChar = GetHTMLCharName( sEntity ); + if( cChar ) + { + rInput.SeekRel( -static_cast<sal_Int64> + (nPos-i)*GetCharSize() ); + nlLinePos -= sal_uInt32(nPos-i); + nPos = i; + ClearTxtConvContext(); + break; + } + } + } + + if( !cChar ) // unknown character? + { + // back in stream, insert '&' + // and restart with next character + sTmpBuffer.append( '&' ); + + DBG_ASSERT( rInput.Tell()-nStreamPos == + static_cast<sal_uInt64>(nPos+1)*GetCharSize(), + "Wrong stream position" ); + DBG_ASSERT( nlLinePos-nLinePos == + static_cast<sal_uInt32>(nPos+1), + "Wrong line position" ); + rInput.Seek( nStreamPos ); + nlLinePos = nLinePos; + ClearTxtConvContext(); + break; + } + + assert(cChar != 0); + + // 1 == Non Breaking Space + // 2 == SoftHyphen + + if (cChar == 1 || cChar == 2) + { + if( '>' == cBreak ) + { + // When reading the content of a tag we have + // to change it to ' ' or '-' + if( 1U == cChar ) + cChar = ' '; + else //2U + cChar = '-'; + } + else + { + // If not scanning a tag return token + aToken.append( sTmpBuffer ); + sTmpBuffer.setLength(0); + + if( !aToken.isEmpty() ) + { + // restart with character + nNextCh = '&'; + DBG_ASSERT( rInput.Tell()-nStreamPos == + static_cast<sal_uInt64>(nPos+1)*GetCharSize(), + "Wrong stream position" ); + DBG_ASSERT( nlLinePos-nLinePos == + static_cast<sal_uInt32>(nPos+1), + "Wrong line position" ); + rInput.Seek( nStreamPos ); + nlLinePos = nLinePos; + ClearTxtConvContext(); + return HtmlTokenId::TEXTTOKEN; + } + + // Hack: _GetNextChar shall not read the + // next character + if( ';' != nNextCh ) + aToken.append( " " ); + if( 1U == cChar ) + return HtmlTokenId::NONBREAKSPACE; + else //2U + return HtmlTokenId::SOFTHYPH; + } + } + } + else + nNextCh = 0U; + } + // &{...};-JavaScript-Macros are not supported any longer. + else if( IsParserWorking() ) + { + sTmpBuffer.append( '&' ); + bNextCh = false; + break; + } + + bNextCh = (';' == nNextCh); + if( cBreak=='>' && (cChar=='\\' || cChar=='\'' || + cChar=='\"' || cChar==' ') ) + { + // ' and " have to be escaped within tags to separate + // them from ' and " enclosing options. + // \ has to be escaped as well. + // Space is protected because it's not a delimiter between + // options. + sTmpBuffer.append( '\\' ); + } + if( IsParserWorking() ) + { + if( cChar ) + sTmpBuffer.appendUtf32( cChar ); + } + else if( SvParserState::Pending==eState && '>'!=cBreak ) + { + // Restart with '&', the remainder is returned as + // text token. + if( !aToken.isEmpty() || !sTmpBuffer.isEmpty() ) + { + // _GetNextChar() returns the previous text and + // during the next execution a new character is read. + // Thus we have to position in front of the '&'. + nNextCh = 0U; + rInput.Seek( nStreamPos - GetCharSize() ); + nlLinePos = nLinePos-1; + ClearTxtConvContext(); + bReadNextChar = true; + } + bNextCh = false; + } + } + break; + case '=': + if( '>'==cBreak && !cQuote ) + bEqSignFound = true; + sTmpBuffer.appendUtf32( nNextCh ); + break; + + case '\\': + if( '>'==cBreak ) + { + // mark within tags + sTmpBuffer.append( '\\' ); + } + sTmpBuffer.append( '\\' ); + break; + + case '\"': + case '\'': + if( '>'==cBreak ) + { + if( bEqSignFound ) + cQuote = nNextCh; + else if( cQuote && (cQuote==nNextCh ) ) + cQuote = 0U; + } + sTmpBuffer.appendUtf32( nNextCh ); + bEqSignFound = false; + break; + + case sal_Unicode(EOF): + if( rInput.eof() ) + { + bContinue = false; + } + // else: ignore, not a valid code point + break; + + case '<': + bEqSignFound = false; + if( '>'==cBreak ) + sTmpBuffer.appendUtf32( nNextCh ); + else + bContinue = false; // break, string is together + break; + + case '\f': + if( '>' == cBreak ) + { + // If scanning options treat it like a space, ... + sTmpBuffer.append( ' ' ); + } + else + { + // otherwise it's a separate token. + bContinue = false; + } + break; + + case '\r': + case '\n': + if( '>'==cBreak ) + { + // cr/lf in tag is handled in GetNextToken_() + sTmpBuffer.appendUtf32( nNextCh ); + break; + } + else if( bReadListing || bReadXMP || bReadPRE || bReadTextArea ) + { + bContinue = false; + break; + } + // Reduce sequence of CR/LF/BLANK/TAB to a single blank + [[fallthrough]]; + case '\t': + if( '\t'==nNextCh && bReadPRE && '>'!=cBreak ) + { + // Pass Tabs up in <PRE> + bContinue = false; + break; + } + [[fallthrough]]; + case '\x0b': + if( '\x0b'==nNextCh && (bReadPRE || bReadXMP ||bReadListing) && + '>'!=cBreak ) + { + break; + } + if (!m_bPreserveSpaces) + nNextCh = ' '; + [[fallthrough]]; + case ' ': + if (!m_bPreserveSpaces) + { + sTmpBuffer.appendUtf32(nNextCh); + if ('>' != cBreak && (!bReadListing && !bReadXMP && !bReadPRE && !bReadTextArea)) + { + // Reduce sequences of Blanks/Tabs/CR/LF to a single blank + do + { + nNextCh = GetNextChar(); + if (sal_Unicode(EOF) == nNextCh && rInput.eof()) + { + if (!aToken.isEmpty() || sTmpBuffer.getLength() > 1) + { + // Have seen s.th. aside from blanks? + aToken.append(sTmpBuffer); + sTmpBuffer.setLength(0); + return HtmlTokenId::TEXTTOKEN; + } + else + // Only read blanks: no text must be returned + // and GetNextToken_ has to read until EOF + return HtmlTokenId::NONE; + } + } while (HTML_ISSPACE(nNextCh)); + bNextCh = false; + } + break; + } + [[fallthrough]]; + default: + bEqSignFound = false; + if (nNextCh == cBreak && !cQuote) + bContinue = false; + else + { + do { + if (!linguistic::IsControlChar(nNextCh) || HTML_ISSPACE(nNextCh)) + { + // All remaining characters make their way into the text. + sTmpBuffer.appendUtf32( nNextCh ); + } + + nNextCh = GetNextChar(); + if( ( sal_Unicode(EOF) == nNextCh && rInput.eof() ) || + !IsParserWorking() ) + { + if( !sTmpBuffer.isEmpty() ) + aToken.append( sTmpBuffer ); + return HtmlTokenId::TEXTTOKEN; + } + } while( rtl::isAsciiAlpha( nNextCh ) || rtl::isAsciiDigit( nNextCh ) ); + bNextCh = false; + } + } + + if( bContinue && bNextCh ) + nNextCh = GetNextChar(); + } + + if( !sTmpBuffer.isEmpty() ) + aToken.append( sTmpBuffer ); + + return HtmlTokenId::TEXTTOKEN; +} + +HtmlTokenId HTMLParser::GetNextRawToken() +{ + OUStringBuffer sTmpBuffer( MAX_LEN ); + + if( bEndTokenFound ) + { + // During the last execution we already found the end token, + // thus we don't have to search it again. + bReadScript = false; + bReadStyle = false; + aEndToken.clear(); + bEndTokenFound = false; + + return HtmlTokenId::NONE; + } + + // Default return value: HtmlTokenId::RAWDATA + bool bContinue = true; + HtmlTokenId nToken = HtmlTokenId::RAWDATA; + SaveState( HtmlTokenId::NONE ); + while( bContinue && IsParserWorking() ) + { + bool bNextCh = true; + switch( nNextCh ) + { + case '<': + { + // Maybe we've reached the end. + + // Save what we have read previously... + aToken.append( sTmpBuffer ); + sTmpBuffer.setLength(0); + + // and remember position in stream. + sal_uInt64 nStreamPos = rInput.Tell(); + sal_uInt32 nLineNr = GetLineNr(); + sal_uInt32 nLinePos = GetLinePos(); + + // Start of an end token? + bool bOffState = false; + if( '/' == (nNextCh = GetNextChar()) ) + { + bOffState = true; + nNextCh = GetNextChar(); + } + else if( '!' == nNextCh ) + { + sTmpBuffer.appendUtf32( nNextCh ); + nNextCh = GetNextChar(); + } + + // Read following letters + while( (rtl::isAsciiAlpha(nNextCh) || '-'==nNextCh) && + IsParserWorking() && sTmpBuffer.getLength() < MAX_LEN ) + { + sTmpBuffer.appendUtf32( nNextCh ); + nNextCh = GetNextChar(); + } + + OUString aTok( sTmpBuffer.toString() ); + aTok = aTok.toAsciiLowerCase(); + bool bDone = false; + if( bReadScript || !aEndToken.isEmpty() ) + { + if( !bReadComment ) + { + if( aTok.startsWith( OOO_STRING_SVTOOLS_HTML_comment ) ) + { + bReadComment = true; + } + else + { + // A script has to end with "</SCRIPT>". But + // ">" is optional for security reasons + bDone = bOffState && + ( bReadScript + ? aTok == OOO_STRING_SVTOOLS_HTML_script + : aTok == aEndToken ); + } + } + if( bReadComment && '>'==nNextCh && aTok.endsWith( "--" ) ) + { + // End of comment of style <!-----> + bReadComment = false; + } + } + else + { + // Style sheets can be closed by </STYLE>, </HEAD> or <BODY> + if( bOffState ) + bDone = aTok == OOO_STRING_SVTOOLS_HTML_style || + aTok == OOO_STRING_SVTOOLS_HTML_head; + else + bDone = aTok == OOO_STRING_SVTOOLS_HTML_body; + } + + if( bDone ) + { + // Done! Return the previously read string (if requested) + // and continue. + + bContinue = false; + + // nToken==0 means, GetNextToken_ continues to read + if( aToken.isEmpty() && (bReadStyle || bReadScript) ) + { + // Immediately close environment (or context?) + // and parse the end token + bReadScript = false; + bReadStyle = false; + aEndToken.clear(); + nToken = HtmlTokenId::NONE; + } + else + { + // Keep bReadScript/bReadStyle alive + // and parse end token during next execution + bEndTokenFound = true; + } + + // Move backwards in stream to '<' + rInput.Seek( nStreamPos ); + SetLineNr( nLineNr ); + SetLinePos( nLinePos ); + ClearTxtConvContext(); + nNextCh = '<'; + + // Don't append string to token. + sTmpBuffer.setLength( 0 ); + } + else + { + // remember "</" , everything else we find in the buffer + aToken.append( "<" ); + if( bOffState ) + aToken.append( "/" ); + + bNextCh = false; + } + } + break; + case '-': + sTmpBuffer.appendUtf32( nNextCh ); + if( bReadComment ) + { + bool bTwoMinus = false; + nNextCh = GetNextChar(); + while( '-' == nNextCh && IsParserWorking() ) + { + bTwoMinus = true; + sTmpBuffer.appendUtf32( nNextCh ); + nNextCh = GetNextChar(); + } + + if( '>' == nNextCh && IsParserWorking() && bTwoMinus ) + bReadComment = false; + + bNextCh = false; + } + break; + + case '\r': + // \r\n? closes the current text token (even if it's empty) + nNextCh = GetNextChar(); + if( nNextCh=='\n' ) + nNextCh = GetNextChar(); + bContinue = false; + break; + case '\n': + // \n closes the current text token (even if it's empty) + nNextCh = GetNextChar(); + bContinue = false; + break; + case sal_Unicode(EOF): + // eof closes the current text token and behaves like having read + // an end token + if( rInput.eof() ) + { + bContinue = false; + if( !aToken.isEmpty() || !sTmpBuffer.isEmpty() ) + { + bEndTokenFound = true; + } + else + { + bReadScript = false; + bReadStyle = false; + aEndToken.clear(); + nToken = HtmlTokenId::NONE; + } + } + break; + default: + if (!linguistic::IsControlChar(nNextCh) || nNextCh == '\t') + { + // all remaining characters are appended to the buffer + sTmpBuffer.appendUtf32( nNextCh ); + } + break; + } + + if( !bContinue && !sTmpBuffer.isEmpty() ) + { + aToken.append( sTmpBuffer ); + sTmpBuffer.setLength(0); + } + + if( bContinue && bNextCh ) + nNextCh = GetNextChar(); + } + + if( IsParserWorking() ) + SaveState( HtmlTokenId::NONE ); + else + nToken = HtmlTokenId::NONE; + + return nToken; +} + +// Scan next token +HtmlTokenId HTMLParser::GetNextToken_() +{ + HtmlTokenId nRet = HtmlTokenId::NONE; + sSaveToken.clear(); + + if (mnPendingOffToken != HtmlTokenId::NONE) + { + // HtmlTokenId::<TOKEN>_OFF generated for HtmlTokenId::<TOKEN>_ON + nRet = mnPendingOffToken; + mnPendingOffToken = HtmlTokenId::NONE; + aToken.setLength( 0 ); + return nRet; + } + + // Delete options + maOptions.clear(); + + if( !IsParserWorking() ) // Don't continue if already an error occurred + return HtmlTokenId::NONE; + + bool bReadNextCharSave = bReadNextChar; + if( bReadNextChar ) + { + DBG_ASSERT( !bEndTokenFound, + "Read a character despite </SCRIPT> was read?" ); + nNextCh = GetNextChar(); + if( !IsParserWorking() ) // Don't continue if already an error occurred + return HtmlTokenId::NONE; + bReadNextChar = false; + } + + if( bReadScript || bReadStyle || !aEndToken.isEmpty() ) + { + nRet = GetNextRawToken(); + if( nRet != HtmlTokenId::NONE || !IsParserWorking() ) + return nRet; + } + + do { + bool bNextCh = true; + switch( nNextCh ) + { + case '<': + { + sal_uInt64 nStreamPos = rInput.Tell(); + sal_uInt32 nLineNr = GetLineNr(); + sal_uInt32 nLinePos = GetLinePos(); + + bool bOffState = false; + if( '/' == (nNextCh = GetNextChar()) ) + { + bOffState = true; + nNextCh = GetNextChar(); + } + // Assume '<?' is a start of an XML declaration, ignore it. + if (rtl::isAsciiAlpha(nNextCh) || nNextCh == '!' || nNextCh == '?') + { + OUStringBuffer sTmpBuffer; + do { + sTmpBuffer.appendUtf32( nNextCh ); + nNextCh = GetNextChar(); + if (std::u16string_view(sTmpBuffer) == u"![CDATA[") + break; + if (bFuzzing && sTmpBuffer.getLength() > 1024) + { + SAL_WARN("svtools", "abandoning import for performance reasons with long tokens"); + eState = SvParserState::Error; + break; + } + } while( '>' != nNextCh && '/' != nNextCh && !rtl::isAsciiWhiteSpace( nNextCh ) && + !linguistic::IsControlChar(nNextCh) && + IsParserWorking() && !rInput.eof() ); + + if( !sTmpBuffer.isEmpty() ) + { + aToken.append( sTmpBuffer ); + sTmpBuffer.setLength(0); + } + + // Skip blanks + while( rtl::isAsciiWhiteSpace( nNextCh ) && IsParserWorking() ) + nNextCh = GetNextChar(); + + if( !IsParserWorking() ) + { + if( SvParserState::Pending == eState ) + bReadNextChar = bReadNextCharSave; + break; + } + + // Search token in table: + sSaveToken = aToken; + aToken = aToken.toString().toAsciiLowerCase(); + + if (!maNamespace.isEmpty() && o3tl::starts_with(aToken, maNamespace)) + aToken.remove( 0, maNamespace.getLength()); + + if( HtmlTokenId::NONE == (nRet = GetHTMLToken( aToken )) ) + // Unknown control + nRet = HtmlTokenId::UNKNOWNCONTROL_ON; + + // If it's a token which can be switched off... + if( bOffState ) + { + if( nRet >= HtmlTokenId::ONOFF_START ) + { + // and there is an off token, return off token instead + nRet = static_cast<HtmlTokenId>(static_cast<int>(nRet) + 1); + } + else if( HtmlTokenId::LINEBREAK!=nRet || !maNamespace.isEmpty()) + { + // and there is no off token, return unknown token. + // (except for </BR>, that is treated like <BR>) + // No exception for XHTML, though. + nRet = HtmlTokenId::UNKNOWNCONTROL_OFF; + } + } + + if( nRet == HtmlTokenId::COMMENT ) + { + // fix: due to being case sensitive use sSaveToken as start of comment + // and append a blank. + aToken = sSaveToken; + if( '>'!=nNextCh ) + aToken.append( " " ); + sal_uInt64 nCStreamPos = 0; + sal_uInt32 nCLineNr = 0; + sal_uInt32 nCLinePos = 0; + sal_Int32 nCStrLen = 0; + + bool bDone = false; + // Read until closing -->. If not found restart at first > + sTmpBuffer = aToken; + while( !bDone && !rInput.eof() && IsParserWorking() ) + { + if( '>'==nNextCh ) + { + if( !nCStreamPos ) + { + nCStreamPos = rInput.Tell(); + nCStrLen = sTmpBuffer.getLength(); + nCLineNr = GetLineNr(); + nCLinePos = GetLinePos(); + } + bDone = sTmpBuffer.getLength() >= 2 && sTmpBuffer[sTmpBuffer.getLength() - 2] == '-' && sTmpBuffer[sTmpBuffer.getLength() - 1] == '-'; + if( !bDone ) + sTmpBuffer.appendUtf32(nNextCh); + } + else if (!linguistic::IsControlChar(nNextCh) + || nNextCh == '\r' || nNextCh == '\n' || nNextCh == '\t') + { + sTmpBuffer.appendUtf32(nNextCh); + } + if( !bDone ) + nNextCh = GetNextChar(); + } + aToken = sTmpBuffer; + sTmpBuffer.setLength(0); + if( !bDone && IsParserWorking() && nCStreamPos ) + { + rInput.Seek( nCStreamPos ); + SetLineNr( nCLineNr ); + SetLinePos( nCLinePos ); + ClearTxtConvContext(); + aToken.truncate(nCStrLen); + nNextCh = '>'; + } + } + else if (nRet == HtmlTokenId::CDATA) + { + // Read until the closing ]]>. + bool bDone = false; + while (!bDone && !rInput.eof() && IsParserWorking()) + { + if (nNextCh == '>') + { + if (sTmpBuffer.getLength() >= 2) + { + bDone = sTmpBuffer[sTmpBuffer.getLength() - 2] == ']' + && sTmpBuffer[sTmpBuffer.getLength() - 1] == ']'; + if (bDone) + { + // Ignore ]] at the end. + sTmpBuffer.setLength(sTmpBuffer.getLength() - 2); + } + } + if (!bDone) + { + sTmpBuffer.appendUtf32(nNextCh); + } + } + else if (!linguistic::IsControlChar(nNextCh)) + { + sTmpBuffer.appendUtf32(nNextCh); + } + if (!bDone) + { + nNextCh = GetNextChar(); + } + } + aToken = sTmpBuffer; + sTmpBuffer.setLength(0); + } + else + { + // TokenString not needed anymore + aToken.setLength( 0 ); + } + + // Read until closing '>' + if( '>' != nNextCh && IsParserWorking() ) + { + ScanText( '>' ); + + // fdo#34666 fdo#36080 fdo#36390: closing "/>"?: + // generate pending HtmlTokenId::<TOKEN>_OFF for HtmlTokenId::<TOKEN>_ON + // Do not convert this to a single HtmlTokenId::<TOKEN>_OFF + // which lead to fdo#56772. + if ((nRet >= HtmlTokenId::ONOFF_START) && o3tl::ends_with(aToken, u"/")) + { + mnPendingOffToken = static_cast<HtmlTokenId>(static_cast<int>(nRet) + 1); // HtmlTokenId::<TOKEN>_ON -> HtmlTokenId::<TOKEN>_OFF + aToken.setLength( aToken.getLength()-1 ); // remove trailing '/' + } + if( sal_Unicode(EOF) == nNextCh && rInput.eof() ) + { + // Move back in front of < and restart there. + // Return < as text. + rInput.Seek( nStreamPos ); + SetLineNr( nLineNr ); + SetLinePos( nLinePos ); + ClearTxtConvContext(); + + aToken = "<"; + nRet = HtmlTokenId::TEXTTOKEN; + nNextCh = GetNextChar(); + bNextCh = false; + break; + } + } + if( SvParserState::Pending == eState ) + bReadNextChar = bReadNextCharSave; + } + else + { + if( bOffState ) + { + // simply throw away everything + ScanText( '>' ); + if( sal_Unicode(EOF) == nNextCh && rInput.eof() ) + { + // Move back in front of < and restart there. + // Return < as text. + rInput.Seek( nStreamPos ); + SetLineNr( nLineNr ); + SetLinePos( nLinePos ); + ClearTxtConvContext(); + + aToken = "<"; + nRet = HtmlTokenId::TEXTTOKEN; + nNextCh = GetNextChar(); + bNextCh = false; + break; + } + if( SvParserState::Pending == eState ) + bReadNextChar = bReadNextCharSave; + aToken.setLength( 0 ); + } + else if( '%' == nNextCh ) + { + nRet = HtmlTokenId::UNKNOWNCONTROL_ON; + + sal_uInt64 nCStreamPos = rInput.Tell(); + sal_uInt32 nCLineNr = GetLineNr(), nCLinePos = GetLinePos(); + + bool bDone = false; + // Read until closing %>. If not found restart at first >. + sal_Unicode nLastTokenChar = !aToken.isEmpty() ? aToken[aToken.getLength() - 1] : 0; + OUStringBuffer aTmpBuffer(aToken); + while( !bDone && !rInput.eof() && IsParserWorking() ) + { + bDone = '>'==nNextCh && nLastTokenChar == '%'; + if( !bDone ) + { + aTmpBuffer.appendUtf32(nNextCh); + nLastTokenChar = aTmpBuffer[aTmpBuffer.getLength() - 1]; + nNextCh = GetNextChar(); + } + } + if( !bDone && IsParserWorking() ) + { + rInput.Seek( nCStreamPos ); + SetLineNr( nCLineNr ); + SetLinePos( nCLinePos ); + ClearTxtConvContext(); + aToken = "<%"; + nRet = HtmlTokenId::TEXTTOKEN; + break; + } + aToken = aTmpBuffer; + aTmpBuffer.setLength(0); + if( IsParserWorking() ) + { + sSaveToken = aToken; + aToken.setLength( 0 ); + } + } + else + { + aToken = "<"; + nRet = HtmlTokenId::TEXTTOKEN; + bNextCh = false; + break; + } + } + + if( IsParserWorking() ) + { + bNextCh = '>' == nNextCh; + switch( nRet ) + { + case HtmlTokenId::TEXTAREA_ON: + bReadTextArea = true; + break; + case HtmlTokenId::TEXTAREA_OFF: + bReadTextArea = false; + break; + case HtmlTokenId::SCRIPT_ON: + if( !bReadTextArea ) + bReadScript = true; + break; + case HtmlTokenId::SCRIPT_OFF: + if( !bReadTextArea ) + { + bReadScript = false; + // JavaScript might modify the stream, + // thus the last character has to be read again. + bReadNextChar = true; + bNextCh = false; + } + break; + + case HtmlTokenId::STYLE_ON: + bReadStyle = true; + break; + case HtmlTokenId::STYLE_OFF: + bReadStyle = false; + break; + default: break; + } + } + } + break; + + case sal_Unicode(EOF): + if( rInput.eof() ) + { + eState = SvParserState::Accepted; + nRet = HtmlTokenId(nNextCh); + } + else + { + // Read normal text. + goto scan_text; + } + break; + + case '\f': + // form feeds are passed upwards separately + nRet = HtmlTokenId::LINEFEEDCHAR; // !!! should be FORMFEEDCHAR + break; + + case '\n': + case '\r': + if( bReadListing || bReadXMP || bReadPRE || bReadTextArea ) + { + sal_Unicode c = GetNextChar(); + if( ( '\n' != nNextCh || '\r' != c ) && + ( '\r' != nNextCh || '\n' != c ) ) + { + bNextCh = false; + nNextCh = c; + } + nRet = HtmlTokenId::NEWPARA; + break; + } + [[fallthrough]]; + case '\t': + if( bReadPRE ) + { + nRet = HtmlTokenId::TABCHAR; + break; + } + [[fallthrough]]; + case ' ': + [[fallthrough]]; + default: + +scan_text: + // "normal" text to come + nRet = ScanText(); + bNextCh = 0 == aToken.getLength(); + + // the text should be processed + if( !bNextCh && eState == SvParserState::Pending ) + { + eState = SvParserState::Working; + bReadNextChar = true; + } + + break; + } + + if( bNextCh && SvParserState::Working == eState ) + { + nNextCh = GetNextChar(); + if( SvParserState::Pending == eState && nRet != HtmlTokenId::NONE && HtmlTokenId::TEXTTOKEN != nRet ) + { + bReadNextChar = true; + eState = SvParserState::Working; + } + } + + } while( nRet == HtmlTokenId::NONE && SvParserState::Working == eState ); + + if( SvParserState::Pending == eState ) + nRet = HtmlTokenId::INVALID; // s.th. invalid + + return nRet; +} + +void HTMLParser::UnescapeToken() +{ + sal_Int32 nPos=0; + + bool bEscape = false; + while( nPos < aToken.getLength() ) + { + bool bOldEscape = bEscape; + bEscape = false; + if( '\\'==aToken[nPos] && !bOldEscape ) + { + aToken.remove( nPos, 1 ); + bEscape = true; + } + else + { + nPos++; + } + } +} + +const HTMLOptions& HTMLParser::GetOptions( HtmlOptionId const *pNoConvertToken ) +{ + // If the options for the current token have already been returned, + // return them once again. + if (!maOptions.empty()) + return maOptions; + + sal_Int32 nPos = 0; + while( nPos < aToken.getLength() ) + { + // A letter? Option beginning here. + if( rtl::isAsciiAlpha( aToken[nPos] ) ) + { + HtmlOptionId nToken; + OUString aValue; + sal_Int32 nStt = nPos; + sal_Unicode cChar = 0; + + // Actually only certain characters allowed. + // Netscape only looks for "=" and white space (c.f. + // Mozilla: PA_FetchRequestedNameValues in libparse/pa_mdl.c) + while( nPos < aToken.getLength() ) + { + cChar = aToken[nPos]; + if ( '=' == cChar ||!HTML_ISPRINTABLE(cChar) || rtl::isAsciiWhiteSpace(cChar) ) + break; + nPos++; + } + + OUString sName( aToken.subView( nStt, nPos-nStt ) ); + + // PlugIns require original token name. Convert to lower case only for searching. + nToken = GetHTMLOption( sName.toAsciiLowerCase() ); // Name is ready + SAL_WARN_IF( nToken==HtmlOptionId::UNKNOWN, "svtools", + "GetOption: unknown HTML option '" << sName << "'" ); + bool bStripCRLF = (nToken < HtmlOptionId::SCRIPT_START || + nToken >= HtmlOptionId::SCRIPT_END) && + (!pNoConvertToken || nToken != *pNoConvertToken); + + while( nPos < aToken.getLength() ) + { + cChar = aToken[nPos]; + if ( HTML_ISPRINTABLE(cChar) && !rtl::isAsciiWhiteSpace(cChar) ) + break; + nPos++; + } + + // Option with value? + if( nPos!=aToken.getLength() && '='==cChar ) + { + nPos++; + + while( nPos < aToken.getLength() ) + { + cChar = aToken[nPos]; + if ( HTML_ISPRINTABLE(cChar) && ' ' != cChar && '\t' != cChar && '\r' != cChar && '\n' != cChar ) + break; + nPos++; + } + + if( nPos != aToken.getLength() ) + { + sal_Int32 nLen = 0; + nStt = nPos; + if( ('"'==cChar) || '\''==cChar ) + { + sal_Unicode cEnd = cChar; + nPos++; nStt++; + bool bDone = false; + bool bEscape = false; + while( nPos < aToken.getLength() && !bDone ) + { + bool bOldEscape = bEscape; + bEscape = false; + cChar = aToken[nPos]; + switch( cChar ) + { + case '\r': + case '\n': + if( bStripCRLF ) + aToken.remove( nPos, 1 ); + else + { + nPos++; + nLen++; + } + break; + case '\\': + if( bOldEscape ) + { + nPos++; + nLen++; + } + else + { + aToken.remove( nPos, 1 ); + bEscape = true; + } + break; + case '"': + case '\'': + bDone = !bOldEscape && cChar==cEnd; + if( !bDone ) + { + nPos++; + nLen++; + } + break; + default: + nPos++; + nLen++; + break; + } + } + if( nPos!=aToken.getLength() ) + nPos++; + } + else + { + // More liberal than the standard: allow all printable characters + bool bEscape = false; + bool bDone = false; + while( nPos < aToken.getLength() && !bDone ) + { + bool bOldEscape = bEscape; + bEscape = false; + sal_Unicode c = aToken[nPos]; + switch( c ) + { + case ' ': + bDone = !bOldEscape; + if( !bDone ) + { + nPos++; + nLen++; + } + break; + + case '\t': + case '\r': + case '\n': + bDone = true; + break; + + case '\\': + if( bOldEscape ) + { + nPos++; + nLen++; + } + else + { + aToken.remove( nPos, 1 ); + bEscape = true; + } + break; + + default: + if( HTML_ISPRINTABLE( c ) ) + { + nPos++; + nLen++; + } + else + bDone = true; + break; + } + } + } + + if( nLen ) + aValue = aToken.subView( nStt, nLen ); + } + } + + // Token is known and can be saved + maOptions.emplace_back(nToken, sName, aValue); + + } + else + // Ignore white space and unexpected characters + nPos++; + } + + return maOptions; +} + +HtmlTokenId HTMLParser::FilterPRE( HtmlTokenId nToken ) +{ + switch( nToken ) + { + // in Netscape they only have impact in not empty paragraphs + case HtmlTokenId::PARABREAK_ON: + nToken = HtmlTokenId::LINEBREAK; + [[fallthrough]]; + case HtmlTokenId::LINEBREAK: + case HtmlTokenId::NEWPARA: + nPre_LinePos = 0; + if( bPre_IgnoreNewPara ) + nToken = HtmlTokenId::NONE; + break; + + case HtmlTokenId::TABCHAR: + { + sal_Int32 nSpaces = 8 - (nPre_LinePos % 8); + DBG_ASSERT( aToken.isEmpty(), "Why is the token not empty?" ); + if (aToken.getLength() < nSpaces) + { + using comphelper::string::padToLength; + OUStringBuffer aBuf(aToken); + aToken = padToLength(aBuf, nSpaces, ' '); + } + nPre_LinePos += nSpaces; + nToken = HtmlTokenId::TEXTTOKEN; + } + break; + // Keep those + case HtmlTokenId::TEXTTOKEN: + nPre_LinePos += aToken.getLength(); + break; + + case HtmlTokenId::SELECT_ON: + case HtmlTokenId::SELECT_OFF: + case HtmlTokenId::BODY_ON: + case HtmlTokenId::FORM_ON: + case HtmlTokenId::FORM_OFF: + case HtmlTokenId::INPUT: + case HtmlTokenId::OPTION: + case HtmlTokenId::TEXTAREA_ON: + case HtmlTokenId::TEXTAREA_OFF: + + case HtmlTokenId::IMAGE: + case HtmlTokenId::APPLET_ON: + case HtmlTokenId::APPLET_OFF: + case HtmlTokenId::PARAM: + case HtmlTokenId::EMBED: + + case HtmlTokenId::HEAD1_ON: + case HtmlTokenId::HEAD1_OFF: + case HtmlTokenId::HEAD2_ON: + case HtmlTokenId::HEAD2_OFF: + case HtmlTokenId::HEAD3_ON: + case HtmlTokenId::HEAD3_OFF: + case HtmlTokenId::HEAD4_ON: + case HtmlTokenId::HEAD4_OFF: + case HtmlTokenId::HEAD5_ON: + case HtmlTokenId::HEAD5_OFF: + case HtmlTokenId::HEAD6_ON: + case HtmlTokenId::HEAD6_OFF: + case HtmlTokenId::BLOCKQUOTE_ON: + case HtmlTokenId::BLOCKQUOTE_OFF: + case HtmlTokenId::ADDRESS_ON: + case HtmlTokenId::ADDRESS_OFF: + case HtmlTokenId::HORZRULE: + + case HtmlTokenId::CENTER_ON: + case HtmlTokenId::CENTER_OFF: + case HtmlTokenId::DIVISION_ON: + case HtmlTokenId::DIVISION_OFF: + + case HtmlTokenId::SCRIPT_ON: + case HtmlTokenId::SCRIPT_OFF: + case HtmlTokenId::RAWDATA: + + case HtmlTokenId::TABLE_ON: + case HtmlTokenId::TABLE_OFF: + case HtmlTokenId::CAPTION_ON: + case HtmlTokenId::CAPTION_OFF: + case HtmlTokenId::COLGROUP_ON: + case HtmlTokenId::COLGROUP_OFF: + case HtmlTokenId::COL_ON: + case HtmlTokenId::COL_OFF: + case HtmlTokenId::THEAD_ON: + case HtmlTokenId::THEAD_OFF: + case HtmlTokenId::TFOOT_ON: + case HtmlTokenId::TFOOT_OFF: + case HtmlTokenId::TBODY_ON: + case HtmlTokenId::TBODY_OFF: + case HtmlTokenId::TABLEROW_ON: + case HtmlTokenId::TABLEROW_OFF: + case HtmlTokenId::TABLEDATA_ON: + case HtmlTokenId::TABLEDATA_OFF: + case HtmlTokenId::TABLEHEADER_ON: + case HtmlTokenId::TABLEHEADER_OFF: + + case HtmlTokenId::ANCHOR_ON: + case HtmlTokenId::ANCHOR_OFF: + case HtmlTokenId::BOLD_ON: + case HtmlTokenId::BOLD_OFF: + case HtmlTokenId::ITALIC_ON: + case HtmlTokenId::ITALIC_OFF: + case HtmlTokenId::STRIKE_ON: + case HtmlTokenId::STRIKE_OFF: + case HtmlTokenId::STRIKETHROUGH_ON: + case HtmlTokenId::STRIKETHROUGH_OFF: + case HtmlTokenId::UNDERLINE_ON: + case HtmlTokenId::UNDERLINE_OFF: + case HtmlTokenId::BASEFONT_ON: + case HtmlTokenId::BASEFONT_OFF: + case HtmlTokenId::FONT_ON: + case HtmlTokenId::FONT_OFF: + case HtmlTokenId::BLINK_ON: + case HtmlTokenId::BLINK_OFF: + case HtmlTokenId::SPAN_ON: + case HtmlTokenId::SPAN_OFF: + case HtmlTokenId::SUBSCRIPT_ON: + case HtmlTokenId::SUBSCRIPT_OFF: + case HtmlTokenId::SUPERSCRIPT_ON: + case HtmlTokenId::SUPERSCRIPT_OFF: + case HtmlTokenId::BIGPRINT_ON: + case HtmlTokenId::BIGPRINT_OFF: + case HtmlTokenId::SMALLPRINT_OFF: + case HtmlTokenId::SMALLPRINT_ON: + + case HtmlTokenId::EMPHASIS_ON: + case HtmlTokenId::EMPHASIS_OFF: + case HtmlTokenId::CITATION_ON: + case HtmlTokenId::CITATION_OFF: + case HtmlTokenId::STRONG_ON: + case HtmlTokenId::STRONG_OFF: + case HtmlTokenId::CODE_ON: + case HtmlTokenId::CODE_OFF: + case HtmlTokenId::SAMPLE_ON: + case HtmlTokenId::SAMPLE_OFF: + case HtmlTokenId::KEYBOARD_ON: + case HtmlTokenId::KEYBOARD_OFF: + case HtmlTokenId::VARIABLE_ON: + case HtmlTokenId::VARIABLE_OFF: + case HtmlTokenId::DEFINSTANCE_ON: + case HtmlTokenId::DEFINSTANCE_OFF: + case HtmlTokenId::SHORTQUOTE_ON: + case HtmlTokenId::SHORTQUOTE_OFF: + case HtmlTokenId::LANGUAGE_ON: + case HtmlTokenId::LANGUAGE_OFF: + case HtmlTokenId::AUTHOR_ON: + case HtmlTokenId::AUTHOR_OFF: + case HtmlTokenId::PERSON_ON: + case HtmlTokenId::PERSON_OFF: + case HtmlTokenId::ACRONYM_ON: + case HtmlTokenId::ACRONYM_OFF: + case HtmlTokenId::ABBREVIATION_ON: + case HtmlTokenId::ABBREVIATION_OFF: + case HtmlTokenId::INSERTEDTEXT_ON: + case HtmlTokenId::INSERTEDTEXT_OFF: + case HtmlTokenId::DELETEDTEXT_ON: + case HtmlTokenId::DELETEDTEXT_OFF: + case HtmlTokenId::TELETYPE_ON: + case HtmlTokenId::TELETYPE_OFF: + + break; + + // The remainder is treated as an unknown token. + default: + if( nToken != HtmlTokenId::NONE ) + { + nToken = + ( ((nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken)) + ? HtmlTokenId::UNKNOWNCONTROL_OFF + : HtmlTokenId::UNKNOWNCONTROL_ON ); + } + break; + } + + bPre_IgnoreNewPara = false; + + return nToken; +} + +HtmlTokenId HTMLParser::FilterXMP( HtmlTokenId nToken ) +{ + switch( nToken ) + { + case HtmlTokenId::NEWPARA: + if( bPre_IgnoreNewPara ) + nToken = HtmlTokenId::NONE; + [[fallthrough]]; + case HtmlTokenId::TEXTTOKEN: + case HtmlTokenId::NONBREAKSPACE: + case HtmlTokenId::SOFTHYPH: + break; // kept + + default: + if( nToken != HtmlTokenId::NONE ) + { + if( (nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken) ) + { + sSaveToken = "</" + sSaveToken; + } + else + sSaveToken = "<" + sSaveToken; + if( !aToken.isEmpty() ) + { + UnescapeToken(); + sSaveToken += " "; + aToken.insert(0, sSaveToken); + } + else + aToken = sSaveToken; + aToken.append( ">" ); + nToken = HtmlTokenId::TEXTTOKEN; + } + break; + } + + bPre_IgnoreNewPara = false; + + return nToken; +} + +HtmlTokenId HTMLParser::FilterListing( HtmlTokenId nToken ) +{ + switch( nToken ) + { + case HtmlTokenId::NEWPARA: + if( bPre_IgnoreNewPara ) + nToken = HtmlTokenId::NONE; + [[fallthrough]]; + case HtmlTokenId::TEXTTOKEN: + case HtmlTokenId::NONBREAKSPACE: + case HtmlTokenId::SOFTHYPH: + break; // kept + + default: + if( nToken != HtmlTokenId::NONE ) + { + nToken = + ( ((nToken >= HtmlTokenId::ONOFF_START) && isOffToken(nToken)) + ? HtmlTokenId::UNKNOWNCONTROL_OFF + : HtmlTokenId::UNKNOWNCONTROL_ON ); + } + break; + } + + bPre_IgnoreNewPara = false; + + return nToken; +} + +bool HTMLParser::InternalImgToPrivateURL( OUString& rURL ) +{ + bool bFound = false; + + if( rURL.startsWith( OOO_STRING_SVTOOLS_HTML_internal_icon ) ) + { + OUString aName( rURL.copy(14) ); + switch( aName[0] ) + { + case 'b': + bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_baddata; + break; + case 'd': + bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_delayed; + break; + case 'e': + bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_embed; + break; + case 'i': + bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_insecure; + break; + case 'n': + bFound = aName == OOO_STRING_SVTOOLS_HTML_INT_ICON_notfound; + break; + } + } + if( bFound ) + { + OUString sTmp ( rURL ); + rURL = OOO_STRING_SVTOOLS_HTML_private_image; + rURL += sTmp; + } + + return bFound; +} + +namespace { + +enum class HtmlMeta { + NONE = 0, + Author, + Description, + Keywords, + Refresh, + Classification, + Created, + ChangedBy, + Changed, + Generator, + SDFootnote, + SDEndnote, + ContentType +}; + +} + +// <META NAME=xxx> +HTMLOptionEnum<HtmlMeta> const aHTMLMetaNameTable[] = +{ + { OOO_STRING_SVTOOLS_HTML_META_author, HtmlMeta::Author }, + { OOO_STRING_SVTOOLS_HTML_META_changed, HtmlMeta::Changed }, + { OOO_STRING_SVTOOLS_HTML_META_changedby, HtmlMeta::ChangedBy }, + { OOO_STRING_SVTOOLS_HTML_META_classification,HtmlMeta::Classification}, + { OOO_STRING_SVTOOLS_HTML_META_content_type, HtmlMeta::ContentType }, + { OOO_STRING_SVTOOLS_HTML_META_created, HtmlMeta::Created }, + { OOO_STRING_SVTOOLS_HTML_META_description, HtmlMeta::Description }, + { OOO_STRING_SVTOOLS_HTML_META_keywords, HtmlMeta::Keywords }, + { OOO_STRING_SVTOOLS_HTML_META_generator, HtmlMeta::Generator }, + { OOO_STRING_SVTOOLS_HTML_META_refresh, HtmlMeta::Refresh }, + { OOO_STRING_SVTOOLS_HTML_META_sdendnote, HtmlMeta::SDEndnote }, + { OOO_STRING_SVTOOLS_HTML_META_sdfootnote, HtmlMeta::SDFootnote }, + { nullptr, HtmlMeta(0) } +}; + + +void HTMLParser::AddMetaUserDefined( OUString const & ) +{ +} + +bool HTMLParser::ParseMetaOptionsImpl( + const uno::Reference<document::XDocumentProperties> & i_xDocProps, + SvKeyValueIterator *i_pHTTPHeader, + const HTMLOptions& aOptions, + rtl_TextEncoding& o_rEnc ) +{ + OUString aName, aContent; + HtmlMeta nAction = HtmlMeta::NONE; + bool bHTTPEquiv = false, bChanged = false; + + for ( size_t i = aOptions.size(); i; ) + { + const HTMLOption& aOption = aOptions[--i]; + switch ( aOption.GetToken() ) + { + case HtmlOptionId::NAME: + aName = aOption.GetString(); + if ( HtmlMeta::NONE==nAction ) + { + aOption.GetEnum( nAction, aHTMLMetaNameTable ); + } + break; + case HtmlOptionId::HTTPEQUIV: + aName = aOption.GetString(); + aOption.GetEnum( nAction, aHTMLMetaNameTable ); + bHTTPEquiv = true; + break; + case HtmlOptionId::CONTENT: + aContent = aOption.GetString(); + break; + case HtmlOptionId::CHARSET: + { + OString sValue(OUStringToOString(aOption.GetString(), RTL_TEXTENCODING_ASCII_US)); + o_rEnc = GetExtendedCompatibilityTextEncoding(rtl_getTextEncodingFromMimeCharset(sValue.getStr())); + break; + } + default: break; + } + } + + if ( bHTTPEquiv || HtmlMeta::Description != nAction ) + { + // if it is not a Description, remove CRs and LFs from CONTENT + aContent = aContent.replaceAll("\r", "").replaceAll("\n", ""); + } + else + { + // convert line endings for Description + aContent = convertLineEnd(aContent, GetSystemLineEnd()); + } + + if ( bHTTPEquiv && i_pHTTPHeader ) + { + // Netscape seems to just ignore a closing ", so we do too + if ( aContent.endsWith("\"") ) + { + aContent = aContent.copy( 0, aContent.getLength() - 1 ); + } + SvKeyValue aKeyValue( aName, aContent ); + i_pHTTPHeader->Append( aKeyValue ); + } + + switch ( nAction ) + { + case HtmlMeta::Author: + if (i_xDocProps.is()) { + i_xDocProps->setAuthor( aContent ); + bChanged = true; + } + break; + case HtmlMeta::Description: + if (i_xDocProps.is()) { + i_xDocProps->setDescription( aContent ); + bChanged = true; + } + break; + case HtmlMeta::Keywords: + if (i_xDocProps.is()) { + i_xDocProps->setKeywords( + ::comphelper::string::convertCommaSeparated(aContent)); + bChanged = true; + } + break; + case HtmlMeta::Classification: + if (i_xDocProps.is()) { + i_xDocProps->setSubject( aContent ); + bChanged = true; + } + break; + + case HtmlMeta::ChangedBy: + if (i_xDocProps.is()) { + i_xDocProps->setModifiedBy( aContent ); + bChanged = true; + } + break; + + case HtmlMeta::Created: + case HtmlMeta::Changed: + if (i_xDocProps.is() && !aContent.isEmpty()) + { + ::util::DateTime uDT; + bool valid = false; + if (comphelper::string::getTokenCount(aContent, ';') == 2) + { + sal_Int32 nIdx{ 0 }; + Date aDate(o3tl::toInt32(o3tl::getToken(aContent, 0, ';', nIdx))); + auto nTime = o3tl::toInt64(o3tl::getToken(aContent, 0, ';', nIdx)); + if (nTime < 0) + nTime = o3tl::saturating_toggle_sign(nTime); + tools::Time aTime(nTime); + DateTime aDateTime(aDate, aTime); + uDT = aDateTime.GetUNODateTime(); + valid = true; + } + else if (utl::ISO8601parseDateTime(aContent, uDT)) + valid = true; + + if (valid) + { + bChanged = true; + if (HtmlMeta::Created == nAction) + i_xDocProps->setCreationDate(uDT); + else + i_xDocProps->setModificationDate(uDT); + } + } + break; + + case HtmlMeta::Refresh: + DBG_ASSERT( !bHTTPEquiv || i_pHTTPHeader, "Lost Reload-URL because of omitted MUST change." ); + break; + + case HtmlMeta::ContentType: + if ( !aContent.isEmpty() ) + { + o_rEnc = GetEncodingByMIME( aContent ); + } + break; + + case HtmlMeta::NONE: + if ( !bHTTPEquiv ) + { + if (i_xDocProps.is()) + { + uno::Reference<beans::XPropertyContainer> xUDProps + = i_xDocProps->getUserDefinedProperties(); + try { + xUDProps->addProperty(aName, + beans::PropertyAttribute::REMOVABLE, + uno::Any(aContent)); + AddMetaUserDefined(aName); + bChanged = true; + } catch (uno::Exception &) { + // ignore + } + } + } + break; + default: + break; + } + + return bChanged; +} + +bool HTMLParser::ParseMetaOptions( + const uno::Reference<document::XDocumentProperties> & i_xDocProps, + SvKeyValueIterator *i_pHeader ) +{ + HtmlOptionId nContentOption = HtmlOptionId::CONTENT; + rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW; + + bool bRet = ParseMetaOptionsImpl( i_xDocProps, i_pHeader, + GetOptions(&nContentOption), + eEnc ); + + // If the encoding is set by a META tag, it may only overwrite the + // current encoding if both, the current and the new encoding, are 1-sal_uInt8 + // encodings. Everything else cannot lead to reasonable results. + if (RTL_TEXTENCODING_DONTKNOW != eEnc && + rtl_isOctetTextEncoding( eEnc ) && + rtl_isOctetTextEncoding( GetSrcEncoding() ) ) + { + eEnc = GetExtendedCompatibilityTextEncoding( eEnc ); + SetSrcEncoding( eEnc ); + } + + return bRet; +} + +rtl_TextEncoding HTMLParser::GetEncodingByMIME( const OUString& rMime ) +{ + OUString sType; + OUString sSubType; + INetContentTypeParameterList aParameters; + if (INetContentTypes::parse(rMime, sType, sSubType, &aParameters)) + { + auto const iter = aParameters.find("charset"_ostr); + if (iter != aParameters.end()) + { + const INetContentTypeParameter * pCharset = &iter->second; + OString sValue(OUStringToOString(pCharset->m_sValue, RTL_TEXTENCODING_ASCII_US)); + return GetExtendedCompatibilityTextEncoding( rtl_getTextEncodingFromMimeCharset( sValue.getStr() ) ); + } + } + return RTL_TEXTENCODING_DONTKNOW; +} + +rtl_TextEncoding HTMLParser::GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader ) +{ + rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW; + if( pHTTPHeader ) + { + SvKeyValue aKV; + for( bool bCont = pHTTPHeader->GetFirst( aKV ); bCont; + bCont = pHTTPHeader->GetNext( aKV ) ) + { + if( aKV.GetKey().equalsIgnoreAsciiCase( OOO_STRING_SVTOOLS_HTML_META_content_type ) ) + { + if( !aKV.GetValue().isEmpty() ) + { + eRet = HTMLParser::GetEncodingByMIME( aKV.GetValue() ); + } + } + } + } + return eRet; +} + +bool HTMLParser::SetEncodingByHTTPHeader( SvKeyValueIterator *pHTTPHeader ) +{ + bool bRet = false; + rtl_TextEncoding eEnc = HTMLParser::GetEncodingByHttpHeader( pHTTPHeader ); + if(RTL_TEXTENCODING_DONTKNOW != eEnc) + { + SetSrcEncoding( eEnc ); + bRet = true; + } + return bRet; +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |