diff options
Diffstat (limited to '')
-rw-r--r-- | include/orcus/css_parser.hpp | 883 |
1 files changed, 883 insertions, 0 deletions
diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp new file mode 100644 index 0000000..93bbc14 --- /dev/null +++ b/include/orcus/css_parser.hpp @@ -0,0 +1,883 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef INCLUDED_ORCUS_CSS_PARSER_HPP +#define INCLUDED_ORCUS_CSS_PARSER_HPP + +#define ORCUS_DEBUG_CSS 0 + +#include "parser_global.hpp" +#include "css_parser_base.hpp" + +#include <cassert> +#include <algorithm> + +#if ORCUS_DEBUG_CSS +#include <iostream> +using std::cout; +using std::endl; +#endif + +namespace orcus { + +/** + * Empty handler for CSS parser. Sub-class from it and implement necessary + * methods. + */ +class css_handler +{ +public: + /** + * Called upon encountering an at-rule. + * + * @param name name of the at-rule. + */ + void at_rule_name(std::string_view name) + { + (void)name; + } + + /** + * Called upon encountering a simple selector type. A simple selector may + * consist of + * + * @code{.txt} + * <type>.<class>#<id> + * @endcode + * + * and this function only passes the type part of the simple selector + * expression. + * + * @param type simple selector type. + */ + void simple_selector_type(std::string_view type) + { + (void)type; + } + + /** + * Called upon encountering a simple selector class. A simple selector may + * consist of + * + * @code{.txt} + * <type>.<class>#<id> + * @endcode + * + * and this function only passes the class part of the simple selector + * expression. + * + * @param cls simple selector class. + */ + void simple_selector_class(std::string_view cls) + { + (void)cls; + } + + /** + * Called upon encountering a pseudo element of a simple selector. For + * instance, given the following CSS block: + * + * @code{.css} + * p::first-line { + * color: blue; + * text-transform: uppercase; + * } + * @endcode + * + * the `first-line` part is the pseudo element of the selector named `p`. + * + * @param pe pseudo element of a simple selector. + */ + void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe) + { + (void)pe; + } + + /** + * Called upon encountering a pseudo class of a simple selector. For + * instance, given the following CSS block: + * + * @code{.css} + * button:hover { + * color: blue; + * } + * @endcode + * + * the `hover` part is the pseudo class of the selector named `button`. + * + * @param pc pseudo class of a simple selector. + */ + void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc) + { + (void)pc; + } + + /** + * Called upon encountering a simple selector id. A simple selector may + * consist of + * + * @code{.txt} + * <type>.<class>#<id> + * @endcode + * + * and this function only passes the id part of the simle selector + * expression. + * + * @param id simple selector id. + */ + void simple_selector_id(std::string_view id) + { + (void)id; + } + + /** + * Called at the end of a simple selector expression. + * + * @todo find out the difference between a simple selector and a selector, + * and document it. + */ + void end_simple_selector() {} + + /** + * Called at the end of a selector expression. + * + * @todo find out the difference between a simple selector and a selector, + * and document it. + */ + void end_selector() {} + + /** + * Calling upon encountering a combinator. A combinator is an operator that + * combines other selectors. Given the following CSS block: + * + * @code{.css} + * div > p { + * background-color: yellow; + * } + * @endcode + * + * the `>` is the combinator that combines the `div` and `p` selectors. + * + * @param combinator type of combinator encountered. + */ + void combinator(orcus::css::combinator_t combinator) + { + (void)combinator; + } + + /** + * Called at each property name. + * + * @param name property name string. + */ + void property_name(std::string_view name) + { + (void)name; + } + + /** + * Called at each ordinary property value string. + * + * @param value value string. + */ + void value(std::string_view value) + { + (void)value; + } + + /** + * Called at each RGB color value of a property. + * + * @param red value of red (0-255) + * @param green value of green (0-255) + * @param blue value of blue (0-255) + */ + void rgb(uint8_t red, uint8_t green, uint8_t blue) + { + (void)red; (void)green; (void)blue; + } + + /** + * Called at each RGB color value of a property with alpha transparency + * value. + * + * @param red value of red (0-255) + * @param green value of green (0-255) + * @param blue value of blue (0-255) + * @param alpha alpha transparency value + */ + void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha) + { + (void)red; (void)green; (void)blue; (void)alpha; + } + + /** + * Called at each HSL color value of a property. + * + * @param hue hue + * @param sat saturation + * @param light lightness + */ + void hsl(uint8_t hue, uint8_t sat, uint8_t light) + { + (void)hue; (void)sat; (void)light; + } + + /** + * Called at each HSL color value of a property with alpha transparency + * value. + * + * @param hue hue + * @param sat saturation + * @param light lightness + * @param alpha alpha value + */ + void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha) + { + (void)hue; (void)sat; (void)light; (void)alpha; + } + + /** + * Called at each URL value of a property. + * + * @param url URL value string. + */ + void url(std::string_view url) + { + (void)url; + } + + /** + * Called when the parsing begins. + */ + void begin_parse() {} + + /** + * Called when the parsing ends. + */ + void end_parse() {} + + /** + * Called at the beginning of each block. An opening brace '{' marks the + * beginning of a block. + */ + void begin_block() {} + + /** + * Called at the end of each block. A closing brace '}' marks the end of + * a block. + */ + void end_block() {} + + /** + * Called at the beginning of a single property expression. Each property + * expression may consist of + * + * @code{.txt} + * <name> : <value>, ..., <value> + * @endcode + * + * terminated by either a `;` or `}`. + */ + void begin_property() {} + + /** + * Called at the end of a single property expression. + */ + void end_property() {} +}; + +/** + * Parser for CSS documents. + * + * @tparam HandlerT Hanlder type with member functions for event callbacks. + * Refer to css_handler. + */ +template<typename HandlerT> +class css_parser : public css::parser_base +{ +public: + typedef HandlerT handler_type; + + css_parser(std::string_view content, handler_type& hdl); + void parse(); + +private: + // Handlers - at the time a handler is called the current position is + // expected to point to the first unprocessed non-blank character, and + // each handler must set the current position to the next unprocessed + // non-blank character when it finishes. + void rule(); + void at_rule_name(); + void simple_selector_name(); + void property_name(); + void property(); + void quoted_value(char c); + void value(); + void function_value(std::string_view v); + void function_rgb(bool alpha); + void function_hsl(bool alpha); + void function_url(); + void name_sep(); + void property_sep(); + void block(); + + handler_type& m_handler; +}; + +template<typename _Handler> +css_parser<_Handler>::css_parser(std::string_view content, handler_type& hdl) : + css::parser_base(content), m_handler(hdl) {} + +template<typename _Handler> +void css_parser<_Handler>::parse() +{ + shrink_stream(); + +#if ORCUS_DEBUG_CSS + std::cout << "compressed: '"; + const char* p = mp_char; + for (; p != mp_end; ++p) + std::cout << *p; + std::cout << "'" << std::endl; +#endif + m_handler.begin_parse(); + while (has_char()) + rule(); + m_handler.end_parse(); +} + +template<typename _Handler> +void css_parser<_Handler>::rule() +{ + // <selector name> , ... , <selector name> <block> + while (has_char()) + { + if (skip_comment()) + continue; + + char c = cur_char(); + if (is_alpha(c)) + { + simple_selector_name(); + continue; + } + + switch (c) + { + case '>': + set_combinator(c, css::combinator_t::direct_child); + break; + case '+': + set_combinator(c, css::combinator_t::next_sibling); + break; + case '.': + case '#': + case '@': + simple_selector_name(); + break; + case ',': + name_sep(); + break; + case '{': + reset_before_block(); + block(); + break; + default: + parse_error::throw_with("rule: failed to parse '", c, "'", offset()); + } + } +} + +template<typename _Handler> +void css_parser<_Handler>::at_rule_name() +{ + assert(has_char()); + assert(cur_char() == '@'); + next(); + char c = cur_char(); + if (!is_alpha(c)) + throw parse_error("at_rule_name: first character of an at-rule name must be an alphabet.", offset()); + + const char* p; + size_t len; + identifier(p, len); + skip_blanks(); + + m_handler.at_rule_name({p, len}); +#if ORCUS_DEBUG_CSS + std::string foo(p, len); + std::cout << "at-rule name: " << foo.c_str() << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::simple_selector_name() +{ + assert(has_char()); + char c = cur_char(); + if (c == '@') + { + // This is the name of an at-rule. + at_rule_name(); + return; + } + + if (m_simple_selector_count) + { +#if ORCUS_DEBUG_CSS + cout << "combinator: " << m_combinator << endl; +#endif + m_handler.combinator(m_combinator); + m_combinator = css::combinator_t::descendant; + } + assert(is_alpha(c) || c == '.' || c == '#'); + + const char* p = nullptr; + size_t n = 0; + +#if ORCUS_DEBUG_CSS + cout << "simple_selector_name: (" << m_simple_selector_count << ")"; +#endif + + if (c != '.' && c != '#') + { + identifier(p, n); +#if ORCUS_DEBUG_CSS + std::string s(p, n); + cout << " type=" << s; +#endif + m_handler.simple_selector_type({p, n}); + } + + bool in_loop = true; + while (in_loop && has_char()) + { + switch (cur_char()) + { + case '.': + { + next(); + identifier(p, n); + m_handler.simple_selector_class({p, n}); +#if ORCUS_DEBUG_CSS + std::string s(p, n); + std::cout << " class=" << s; +#endif + } + break; + case '#': + { + next(); + identifier(p, n); + m_handler.simple_selector_id({p, n}); +#if ORCUS_DEBUG_CSS + std::string s(p, n); + std::cout << " id=" << s; +#endif + } + break; + case ':': + { + // This could be either a pseudo element or pseudo class. + next(); + if (cur_char() == ':') + { + // pseudo element. + next(); + identifier(p, n); + css::pseudo_element_t elem = css::to_pseudo_element({p, n}); + if (!elem) + parse_error::throw_with( + "selector_name: unknown pseudo element '", {p, n}, "'", offset()); + + m_handler.simple_selector_pseudo_element(elem); + } + else + { + // pseudo class (or pseudo element in the older version of CSS). + identifier(p, n); + css::pseudo_class_t pc = css::to_pseudo_class({p, n}); + if (!pc) + parse_error::throw_with( + "selector_name: unknown pseudo class '", {p, n}, "'", offset()); + + m_handler.simple_selector_pseudo_class(pc); + } + } + break; + default: + in_loop = false; + } + } + + m_handler.end_simple_selector(); + skip_comments_and_blanks(); + + ++m_simple_selector_count; + +#if ORCUS_DEBUG_CSS + std::cout << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::property_name() +{ + // <identifier> + + assert(has_char()); + char c = cur_char(); + if (!is_alpha(c) && c != '.') + parse_error::throw_with( + "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'", offset()); + + const char* p; + size_t len; + identifier(p, len); + skip_comments_and_blanks(); + + m_handler.property_name({p, len}); +#if ORCUS_DEBUG_CSS + std::string foo(p, len); + std::cout << "property name: " << foo.c_str() << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::property() +{ + // <property name> : <value> , ... , <value> + + m_handler.begin_property(); + property_name(); + if (cur_char() != ':') + throw parse_error("property: ':' expected.", offset()); + next(); + skip_comments_and_blanks(); + + bool in_loop = true; + while (in_loop && has_char()) + { + value(); + char c = cur_char(); + switch (c) + { + case ',': + { + // separated by commas. + next(); + skip_comments_and_blanks(); + } + break; + case ';': + case '}': + in_loop = false; + break; + default: + ; + } + } + + skip_comments_and_blanks(); + m_handler.end_property(); +} + +template<typename _Handler> +void css_parser<_Handler>::quoted_value(char c) +{ + // Parse until the the end quote is reached. + const char* p = nullptr; + size_t len = 0; + literal(p, len, c); + next(); + skip_blanks(); + + m_handler.value({p, len}); +#if ORCUS_DEBUG_CSS + std::string foo(p, len); + std::cout << "quoted value: " << foo.c_str() << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::value() +{ + assert(has_char()); + char c = cur_char(); + if (c == '"' || c == '\'') + { + quoted_value(c); + return; + } + + std::string_view v = parse_value(); + if (v.empty()) + return; + + if (cur_char() == '(') + { + function_value(v); + return; + } + + m_handler.value(v); + + skip_comments_and_blanks(); + +#if ORCUS_DEBUG_CSS + std::cout << "value: " << v << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::function_value(std::string_view v) +{ + assert(cur_char() == '('); + css::property_function_t func = css::to_property_function(v); + if (func == css::property_function_t::unknown) + parse_error::throw_with("function_value: unknown function '", v, "'", offset()); + + // Move to the first character of the first argument. + next(); + skip_comments_and_blanks(); + + switch (func) + { + case css::property_function_t::rgb: + function_rgb(false); + break; + case css::property_function_t::rgba: + function_rgb(true); + break; + case css::property_function_t::hsl: + function_hsl(false); + break; + case css::property_function_t::hsla: + function_hsl(true); + break; + case css::property_function_t::url: + function_url(); + break; + default: + parse_error::throw_with("function_value: unhandled function '", v, "'", offset()); + } + + char c = cur_char(); + if (c != ')') + parse_error::throw_with("function_value: ')' expected but '", c, "' found.", offset()); + + next(); + skip_comments_and_blanks(); +} + +template<typename _Handler> +void css_parser<_Handler>::function_rgb(bool alpha) +{ + // rgb(num, num, num) rgba(num, num, num, float) + + uint8_t vals[3]; + uint8_t* p = vals; + const uint8_t* plast = p + 2; + char c = 0; + + for (; ; ++p) + { + *p = parse_uint8(); + + skip_comments_and_blanks(); + + if (p == plast) + break; + + c = cur_char(); + + if (c != ',') + parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset()); + + next(); + skip_comments_and_blanks(); + } + + if (alpha) + { + c = cur_char(); + if (c != ',') + parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset()); + + next(); + skip_comments_and_blanks(); + + double alpha_val = parse_double_or_throw(); + + alpha_val = std::clamp(alpha_val, 0.0, 1.0); + m_handler.rgba(vals[0], vals[1], vals[2], alpha_val); + } + else + m_handler.rgb(vals[0], vals[1], vals[2]); + +#if ORCUS_DEBUG_CSS + std::cout << "rgb"; + if (alpha) + std::cout << 'a'; + std::cout << '('; + p = vals; + const uint8_t* pend = plast + 1; + for (; p != pend; ++p) + std::cout << ' ' << (int)*p; + std::cout << " )" << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::function_hsl(bool alpha) +{ + // hsl(num, percent, percent) hsla(num, percent, percent, float) + + double hue = parse_double_or_throw(); // casted to uint8_t eventually. + hue = std::clamp(hue, 0.0, 360.0); + skip_comments_and_blanks(); + + char c = cur_char(); + if (c != ',') + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); + + next(); + skip_comments_and_blanks(); + + double sat = parse_percent(); + sat = std::clamp(sat, 0.0, 100.0); + skip_comments_and_blanks(); + + c = cur_char(); + if (c != ',') + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); + + next(); + skip_comments_and_blanks(); + + double light = parse_percent(); + light = std::clamp(light, 0.0, 100.0); + skip_comments_and_blanks(); + + if (!alpha) + { + m_handler.hsl(hue, sat, light); + return; + } + + c = cur_char(); + if (c != ',') + parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset()); + + next(); + skip_comments_and_blanks(); + + double alpha_val = parse_double_or_throw(); + alpha_val = std::clamp(alpha_val, 0.0, 1.0); + skip_comments_and_blanks(); + m_handler.hsla(hue, sat, light, alpha_val); +} + +template<typename _Handler> +void css_parser<_Handler>::function_url() +{ + char c = cur_char(); + + if (c == '"' || c == '\'') + { + // Quoted URL value. + const char* p; + size_t len; + literal(p, len, c); + next(); + skip_comments_and_blanks(); + m_handler.url({p, len}); +#if ORCUS_DEBUG_CSS + std::cout << "url(" << std::string(p, len) << ")" << std::endl; +#endif + return; + } + + // Unquoted URL value. + const char* p; + size_t len; + skip_to_or_blank(p, len, ")"); + skip_comments_and_blanks(); + m_handler.url({p, len}); +#if ORCUS_DEBUG_CSS + std::cout << "url(" << std::string(p, len) << ")" << std::endl; +#endif +} + +template<typename _Handler> +void css_parser<_Handler>::name_sep() +{ + assert(cur_char() == ','); +#if ORCUS_DEBUG_CSS + std::cout << "," << std::endl; +#endif + next(); + skip_blanks(); + m_handler.end_selector(); +} + +template<typename _Handler> +void css_parser<_Handler>::property_sep() +{ +#if ORCUS_DEBUG_CSS + std::cout << ";" << std::endl; +#endif + next(); + skip_comments_and_blanks(); +} + +template<typename _Handler> +void css_parser<_Handler>::block() +{ + // '{' <property> ';' ... ';' <property> ';'(optional) '}' + + assert(cur_char() == '{'); +#if ORCUS_DEBUG_CSS + std::cout << "{" << std::endl; +#endif + m_handler.end_selector(); + m_handler.begin_block(); + + next(); + skip_comments_and_blanks(); + + // parse properties. + while (has_char()) + { + property(); + if (cur_char() != ';') + break; + property_sep(); + if (cur_char() == '}') + // ';' after the last property. This is optional but allowed. + break; + } + + if (cur_char() != '}') + throw parse_error("block: '}' expected.", offset()); + + m_handler.end_block(); + + next(); + skip_comments_and_blanks(); + +#if ORCUS_DEBUG_CSS + std::cout << "}" << std::endl; +#endif +} + +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |