diff options
Diffstat (limited to '')
-rw-r--r-- | include/svtools/htmltokn.h | 480 |
1 files changed, 480 insertions, 0 deletions
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h new file mode 100644 index 0000000000..4a333ee2f6 --- /dev/null +++ b/include/svtools/htmltokn.h @@ -0,0 +1,480 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#pragma once + +#include <svtools/svtdllapi.h> +#include <sal/types.h> +#include <stdexcept> +#include <string_view> + +namespace rtl { + class OUString; +}; +enum class HtmlOptionId; +enum class HtmlTokenId : sal_Int16; + +// search the char for the CharName +sal_Unicode GetHTMLCharName( std::u16string_view rName ); + +// search the TokenID for the token +SVT_DLLPUBLIC HtmlTokenId GetHTMLToken( std::u16string_view rName ); + +// search the TokenId for an attribute token +HtmlOptionId GetHTMLOption( std::u16string_view rName ); + +// search the 24-bit color for a color name (not found = SAL_MAX_UINT32) +SVT_DLLPUBLIC sal_uInt32 GetHTMLColor( const rtl::OUString& rName ); + +enum class HtmlTokenId : sal_Int16 +{ + INVALID = -1, + NONE = 0, +// always starting from 256 on, greater than a char + TEXTTOKEN = 0x100, + SINGLECHAR, + NEWPARA, + TABCHAR, + RAWDATA, + LINEFEEDCHAR, + + // these will only be turned on + AREA, // Netscape 2.0 + BASE, // HTML 3.0 + COMMENT, + CDATA, + DUMMY, // so ONOFF_START is even + DOCTYPE, + EMBED, // Netscape 2.0 ignore </EMBED> + HORZRULE, // ignore </HR> + IMAGE, // ignore </IMG> + INPUT, // ignore </INPUT> + LINEBREAK, // </BR> -> <BR> + LINK, // HTML 3.0 + META, // HTML 3.0 ignore </META> + OPTION, // ignore </OPTION> + PARAM, // HotJava + SPACER, // Netscape 3.0b5 // ignore </SPACER> + + // tokens recognised using HTML character + NONBREAKSPACE, + SOFTHYPH, + + // these will be turned back off, + // the off value is always located behind (+1) !! + ONOFF_START, + ABBREVIATION_ON = ONOFF_START, // HTML 3.0 + ABBREVIATION_OFF, // HTML 3.0 + ACRONYM_ON, // HTML 3.0 + ACRONYM_OFF, // HTML 3.0 + ADDRESS_ON, + ADDRESS_OFF, + ANCHOR_ON, + ANCHOR_OFF, + APPLET_ON, // HotJava + APPLET_OFF, // HotJava + AUTHOR_ON, // HTML 3.0 + AUTHOR_OFF, // HTML 3.0 + BANNER_ON, // HTML 3.0 + BANNER_OFF, // HTML 3.0 + BASEFONT_ON, // Netscape + BASEFONT_OFF, // Netscape + BIGPRINT_ON, // HTML 3.0 + BIGPRINT_OFF, // HTML 3.0 + BLINK_ON, // Netscape + BLINK_OFF, // Netscape + BLOCKQUOTE30_ON, // HTML 3.0 + BLOCKQUOTE30_OFF, // HTML 3.0 + BLOCKQUOTE_ON, + BLOCKQUOTE_OFF, + BODY_ON, + BODY_OFF, + BOLD_ON, + BOLD_OFF, + CAPTION_ON, // HTML 3.0 + CAPTION_OFF, // HTML 3.0 + CENTER_ON, // Netscape + CENTER_OFF, // Netscape + CITATION_ON, + CITATION_OFF, + CODE_ON, + CODE_OFF, + COL_ON, // HTML3 Table Model Draft + COL_OFF, // HTML3 Table Model Draft + COLGROUP_ON, // HTML3 Table Model Draft + COLGROUP_OFF, // HTML3 Table Model Draft + CREDIT_ON, // HTML 3.0 + CREDIT_OFF, // HTML 3.0 + DD_ON, + DD_OFF, + DEFLIST_ON, + DEFLIST_OFF, + DELETEDTEXT_ON, // HTML 3.0 + DELETEDTEXT_OFF, // HTML 3.0 + DIRLIST_ON, + DIRLIST_OFF, + DIVISION_ON, // HTML 3.0 + DIVISION_OFF, // HTML 3.0 + DT_ON, + DT_OFF, + EMPHASIS_ON, + EMPHASIS_OFF, + FIGURE_ON, // HTML 3.0 + FIGURE_OFF, // HTML 3.0 + FONT_ON, // Netscape + FONT_OFF, // Netscape + FOOTNOTE_ON, // HTML 3.0 + FOOTNOTE_OFF, // HTML 3.0 + FORM_ON, + FORM_OFF, + FRAME_ON, // Netscape 2.0 + FRAME_OFF, // Netscape 2.0 + FRAMESET_ON, // Netscape 2.0 + FRAMESET_OFF, // Netscape 2.0 + HEAD1_ON, + HEAD1_OFF, + HEAD2_ON, + HEAD2_OFF, + HEAD3_ON, + HEAD3_OFF, + HEAD4_ON, + HEAD4_OFF, + HEAD5_ON, + HEAD5_OFF, + HEAD6_ON, + HEAD6_OFF, + HEAD_ON, + HEAD_OFF, + HTML_ON, + HTML_OFF, + IFRAME_ON, // IE 3.0b2 + IFRAME_OFF, // IE 3.0b2 + INSERTEDTEXT_ON, // HTML 3.0 + INSERTEDTEXT_OFF, // HTML 3.0 + ITALIC_ON, + ITALIC_OFF, + KEYBOARD_ON, + KEYBOARD_OFF, + LANGUAGE_ON, // HTML 3.0 + LANGUAGE_OFF, // HTML 3.0 + LISTHEADER_ON, // HTML 3.0 + LISTHEADER_OFF, // HTML 3.0 + LI_ON, + LI_OFF, + MAP_ON, // Netscape 2.0 + MAP_OFF, // Netscape 2.0 + MENULIST_ON, + MENULIST_OFF, + MULTICOL_ON, // Netscape 3.0b5 + MULTICOL_OFF, // Netscape 3.0b5 + NOBR_ON, // Netscape + NOBR_OFF, // Netscape + NOEMBED_ON, // Netscape 2.0 + NOEMBED_OFF, // Netscape 2.0 + NOFRAMES_ON, // Netscape 2.0 + NOFRAMES_OFF, // Netscape 2.0 + NOSCRIPT_ON, // Netscape 2.0 + NOSCRIPT_OFF, // Netscape 3.0 + NOTE_ON, // HTML 3.0 + NOTE_OFF, // HTML 3.0 + OBJECT_ON, // HotJava + OBJECT_OFF, // HotJava + ORDERLIST_ON, + ORDERLIST_OFF, + PARABREAK_ON, + PARABREAK_OFF, + PERSON_ON, // HTML 3.0 + PERSON_OFF, // HTML 3.0 + PLAINTEXT_ON, // HTML 3.0 + PLAINTEXT_OFF, // HTML 3.0 + PREFORMTXT_ON, + PREFORMTXT_OFF, + SAMPLE_ON, + SAMPLE_OFF, + SCRIPT_ON, // HTML 3.2 + SCRIPT_OFF, // HTML 3.2 + SELECT_ON, + SELECT_OFF, + SHORTQUOTE_ON, // HTML 3.0 + SHORTQUOTE_OFF, // HTML 3.0 + SMALLPRINT_ON, // HTML 3.0 + SMALLPRINT_OFF, // HTML 3.0 + SPAN_ON, // Style Sheets + SPAN_OFF, // Style Sheets + STRIKETHROUGH_ON, // HTML 3.0 + STRIKETHROUGH_OFF, // HTML 3.0 + STRONG_ON, + STRONG_OFF, + STYLE_ON, // HTML 3.0 + STYLE_OFF, // HTML 3.0 + SUBSCRIPT_ON, // HTML 3.0 + SUBSCRIPT_OFF, // HTML 3.0 + SUPERSCRIPT_ON, // HTML 3.0 + SUPERSCRIPT_OFF, // HTML 3.0 + TABLE_ON, // HTML 3.0 + TABLE_OFF, // HTML 3.0 + TABLEDATA_ON, // HTML 3.0 + TABLEDATA_OFF, // HTML 3.0 + TABLEHEADER_ON, // HTML 3.0 + TABLEHEADER_OFF, // HTML 3.0 + TABLEROW_ON, // HTML 3.0 + TABLEROW_OFF, // HTML 3.0 + TBODY_ON, // HTML3 Table Model Draft + TBODY_OFF, // HTML3 Table Model Draft + TELETYPE_ON, + TELETYPE_OFF, + TEXTAREA_ON, + TEXTAREA_OFF, + TFOOT_ON, // HTML3 Table Model Draft + TFOOT_OFF, // HTML3 Table Model Draft + THEAD_ON, // HTML3 Table Model Draft + THEAD_OFF, // HTML3 Table Model Draft + TITLE_ON, + TITLE_OFF, + UNDERLINE_ON, + UNDERLINE_OFF, + UNORDERLIST_ON, + UNORDERLIST_OFF, + VARIABLE_ON, + VARIABLE_OFF, + + // obsolete features + XMP_ON, + XMP_OFF, + LISTING_ON, + LISTING_OFF, + + // proposed features + DEFINSTANCE_ON, + DEFINSTANCE_OFF, + STRIKE_ON, + STRIKE_OFF, + + UNKNOWNCONTROL_ON, + UNKNOWNCONTROL_OFF, + + // Microsoft features + COMMENT2_ON, // HTML 2.0 ? + COMMENT2_OFF, // HTML 2.0 ? + MARQUEE_ON, + MARQUEE_OFF, + PLAINTEXT2_ON, // HTML 2.0 ? + PLAINTEXT2_OFF, // HTML 2.0 ? + + SDFIELD_ON, + SDFIELD_OFF +}; + +constexpr bool isOffToken(HtmlTokenId nToken) +{ + return (nToken == HtmlTokenId::NONE || nToken >= HtmlTokenId::ONOFF_START) + ? (1 & static_cast<int>(nToken)) + : throw std::logic_error("Assertion failed!"); // C++11 does not do assert in constexpr +} + +constexpr HtmlTokenId getOnToken(HtmlTokenId nToken) +{ + return (nToken == HtmlTokenId::NONE || nToken >= HtmlTokenId::ONOFF_START) + ? HtmlTokenId(~1 & static_cast<int>(nToken)) + : throw std::logic_error("Assertion failed!"); // C++11 does not do assert in constexpr +} + +// HTML attribute token (=Options) + +enum class HtmlOptionId +{ +// always starting from 256 on, greater than a char +BOOL_START = 0x100, + +// attributes without value + CHECKED = BOOL_START, + COMPACT, + DECLARE, // IExplorer 3.0b5 + DISABLED, + ISMAP, + MAYSCRIPT, // Netscape 3.0 + MULTIPLE, + NOHREF, // Netscape + NORESIZE, // Netscape 2.0 + NOSHADE, // Netscape + NOWRAP, + SDFIXED, + SELECTED, +BOOL_END, + +// attributes with a string as value +STRING_START = BOOL_END, + ACCEPT = STRING_START, + ACCESSKEY, + ALT, + AXIS, + CHAR, // HTML3 Table Model Draft + CHARSET, + CLASS, + CODE, // HotJava + CODETYPE, + CONTENT, + COORDS, // Netscape 2.0 + ENCTYPE, + FACE, // IExplorer 2.0 + FRAMEBORDER, // IExplorer 3.0 + HTTPEQUIV, + LANGUAGE, // JavaScript + NAME, + PROMPT, + SHAPE, + STANDBY, + STYLE, // Style Sheets + TITLE, + VALUE, + SDVAL, // StarDiv NumberValue + SDNUM, // StarDiv NumberFormat + SDLIBRARY, + SDMODULE, +STRING_END, + +// attributes with an SGML identifier as value +SGMLID_START = STRING_END, + ID = SGMLID_START, + TARGET, // Netscape 2.0 + TO, +SGMLID_END, + +// attributes with a URI as value +URI_START = SGMLID_END, + ACTION = URI_START, + ARCHIVE, + BACKGROUND, + CLASSID, + CODEBASE, // HotJava + DATA, + HREF, + SCRIPT, + SRC, + USEMAP, // Netscape 2.0 +URI_END, + +// attributes with a color as value (all Netscape) +COLOR_START = URI_END, + ALINK = COLOR_START, + BGCOLOR, + BORDERCOLOR, // IExplorer 2.0 + BORDERCOLORLIGHT, // IExplorer 2.0 + BORDERCOLORDARK, // IExplorer 2.0 + COLOR, + LINK, + TEXT, + VLINK, +COLOR_END, + +// attributes with a numeric value +NUMBER_START = COLOR_END, + BORDER = NUMBER_START, + CELLSPACING, // HTML3 Table Model Draft + CELLPADDING, // HTML3 Table Model Draft + CHAROFF, // HTML3 Table Model Draft + COLSPAN, + FRAMESPACING, // IExplorer 3.0 + GUTTER, // Netscape 3.0b5 + HEIGHT, + HSPACE, // Netscape + LEFT, + LOOP, // IExplorer 2.0 + MARGINWIDTH, // Netscape 2.0 + MARGINHEIGHT, // Netscape 2.0 + MAXLENGTH, + ROWSPAN, + SCROLLAMOUNT, // IExplorer 2.0 + SCROLLDELAY, // IExplorer 2.0 + SPAN, // HTML3 Table Model Draft + TABINDEX, + VSPACE, // Netscape + WIDTH, + ZINDEX, +NUMBER_END, + +// attributes with Enum values +ENUM_START = NUMBER_END, + BEHAVIOR = ENUM_START, // IExplorer 2.0 + CLEAR, + DIR, + DIRECTION, // IExplorer 2.0 + FORMAT, + FRAME, // HTML3 Table Model Draft + LANG, + METHOD, + REL, + REV, + RULES, // HTML3 Table Model Draft + SCROLLING, // Netscape 2.0 + SDREADONLY, + SUBTYPE, + TYPE, + VALIGN, + VALUETYPE, + WRAP, + XML_SPACE, +ENUM_END, + +// attributes with script code as value +SCRIPT_START = ENUM_END, + ONABORT = SCRIPT_START, // JavaScript + ONBLUR, // JavaScript + ONCHANGE, // JavaScript + ONCLICK, // JavaScript + ONERROR, // JavaScript + ONFOCUS, // JavaScript + ONLOAD, // JavaScript + ONMOUSEOUT, // JavaScript + ONMOUSEOVER, // JavaScript + ONRESET, // JavaScript + ONSELECT, // JavaScript + ONSUBMIT, // JavaScript + ONUNLOAD, // JavaScript + + SDONABORT, // StarBasic + SDONBLUR, // StarBasic + SDONCHANGE, // StarBasic + SDONCLICK, // StarBasic + SDONERROR, // StarBasic + SDONFOCUS, // StarBasic + SDONLOAD, // StarBasic + SDONMOUSEOUT, // StarBasic + SDONMOUSEOVER, // StarBasic + SDONRESET, // StarBasic + SDONSELECT, // StarBasic + SDONSUBMIT, // StarBasic + SDONUNLOAD, // StarBasic +SCRIPT_END, + +// attributes with context dependent values +CONTEXT_START = SCRIPT_END, + ALIGN = CONTEXT_START, + COLS, // Netscape 2.0 vs HTML 2.0 + ROWS, // Netscape 2.0 vs HTML 2.0 + SIZE, + START, +CONTEXT_END, + +// an unknown option +UNKNOWN = CONTEXT_END, +END +}; + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |