diff options
Diffstat (limited to 'src/libserver/css')
23 files changed, 5622 insertions, 0 deletions
diff --git a/src/libserver/css/CMakeLists.txt b/src/libserver/css/CMakeLists.txt new file mode 100644 index 0000000..c0c9d51 --- /dev/null +++ b/src/libserver/css/CMakeLists.txt @@ -0,0 +1,9 @@ +SET(LIBCSSSRC "${CMAKE_CURRENT_SOURCE_DIR}/css.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_property.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_value.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_selector.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_tokeniser.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_util.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_rule.cxx" + "${CMAKE_CURRENT_SOURCE_DIR}/css_parser.cxx" + PARENT_SCOPE)
\ No newline at end of file diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx new file mode 100644 index 0000000..1b369ed --- /dev/null +++ b/src/libserver/css/css.cxx @@ -0,0 +1,227 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css.hxx" +#include "contrib/ankerl/unordered_dense.h" +#include "css_parser.hxx" +#include "libserver/html/html_tag.hxx" +#include "libserver/html/html_block.hxx" + +/* Keep unit tests implementation here (it'll possibly be moved outside one day) */ +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#define DOCTEST_CONFIG_IMPLEMENT +#include "doctest/doctest.h" + +namespace rspamd::css { + +INIT_LOG_MODULE_PUBLIC(css); + +class css_style_sheet::impl { +public: + using sel_shared_hash = smart_ptr_hash<css_selector>; + using sel_shared_eq = smart_ptr_equal<css_selector>; + using selector_ptr = std::unique_ptr<css_selector>; + using selectors_hash = ankerl::unordered_dense::map<selector_ptr, css_declarations_block_ptr, + sel_shared_hash, sel_shared_eq>; + using universal_selector_t = std::pair<selector_ptr, css_declarations_block_ptr>; + selectors_hash tags_selector; + selectors_hash class_selectors; + selectors_hash id_selectors; + std::optional<universal_selector_t> universal_selector; +}; + +css_style_sheet::css_style_sheet(rspamd_mempool_t *pool) + : pool(pool), pimpl(new impl) +{ +} +css_style_sheet::~css_style_sheet() +{ +} + +auto css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector, + css_declarations_block_ptr decls) -> void +{ + impl::selectors_hash *target_hash = nullptr; + + switch (selector->type) { + case css_selector::selector_type::SELECTOR_ALL: + if (pimpl->universal_selector) { + /* Another universal selector */ + msg_debug_css("redefined universal selector, merging rules"); + pimpl->universal_selector->second->merge_block(*decls); + } + else { + msg_debug_css("added universal selector"); + pimpl->universal_selector = std::make_pair(std::move(selector), + decls); + } + break; + case css_selector::selector_type::SELECTOR_CLASS: + target_hash = &pimpl->class_selectors; + break; + case css_selector::selector_type::SELECTOR_ID: + target_hash = &pimpl->id_selectors; + break; + case css_selector::selector_type::SELECTOR_TAG: + target_hash = &pimpl->tags_selector; + break; + } + + if (target_hash) { + auto found_it = target_hash->find(selector); + + if (found_it == target_hash->end()) { + /* Easy case, new element */ + target_hash->insert({std::move(selector), decls}); + } + else { + /* The problem with merging is actually in how to handle selectors chains + * For example, we have 2 selectors: + * 1. class id tag -> meaning that we first match class, then we ensure that + * id is also the same and finally we check the tag + * 2. tag class id -> it means that we check first tag, then class and then id + * So we have somehow equal path in the xpath terms. + * I suppose now, that we merely check parent stuff and handle duplicates + * merging when finally resolving paths. + */ + auto sel_str = selector->to_string().value_or("unknown"); + msg_debug_css("found duplicate selector: %*s", (int) sel_str.size(), + sel_str.data()); + found_it->second->merge_block(*decls); + } + } +} + +auto css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block * +{ + std::optional<std::string_view> id_comp, class_comp; + rspamd::html::html_block *res = nullptr; + + if (!tag) { + return nullptr; + } + + /* First, find id in a tag and a class */ + for (const auto ¶m: tag->components) { + if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) { + id_comp = param.value; + } + else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) { + class_comp = param.value; + } + } + + /* ID part */ + if (id_comp && !pimpl->id_selectors.empty()) { + auto found_id_sel = pimpl->id_selectors.find(css_selector{id_comp.value()}); + + if (found_id_sel != pimpl->id_selectors.end()) { + const auto &decl = *(found_id_sel->second); + res = decl.compile_to_block(pool); + } + } + + /* Class part */ + if (class_comp && !pimpl->class_selectors.empty()) { + auto sv_split = [](auto strv, std::string_view delims = " ") -> std::vector<std::string_view> { + std::vector<decltype(strv)> ret; + std::size_t start = 0; + + while (start < strv.size()) { + const auto last = strv.find_first_of(delims, start); + if (start != last) { + ret.emplace_back(strv.substr(start, last - start)); + } + + if (last == std::string_view::npos) { + break; + } + + start = last + 1; + } + + return ret; + }; + + auto elts = sv_split(class_comp.value()); + + for (const auto &e: elts) { + auto found_class_sel = pimpl->class_selectors.find( + css_selector{e, css_selector::selector_type::SELECTOR_CLASS}); + + if (found_class_sel != pimpl->class_selectors.end()) { + const auto &decl = *(found_class_sel->second); + auto *tmp = decl.compile_to_block(pool); + + if (res == nullptr) { + res = tmp; + } + else { + res->propagate_block(*tmp); + } + } + } + } + + /* Tags part */ + if (!pimpl->tags_selector.empty()) { + auto found_tag_sel = pimpl->tags_selector.find( + css_selector{static_cast<tag_id_t>(tag->id)}); + + if (found_tag_sel != pimpl->tags_selector.end()) { + const auto &decl = *(found_tag_sel->second); + auto *tmp = decl.compile_to_block(pool); + + if (res == nullptr) { + res = tmp; + } + else { + res->propagate_block(*tmp); + } + } + } + + /* Finally, universal selector */ + if (pimpl->universal_selector) { + auto *tmp = pimpl->universal_selector->second->compile_to_block(pool); + + if (res == nullptr) { + res = tmp; + } + else { + res->propagate_block(*tmp); + } + } + + return res; +} + +auto css_parse_style(rspamd_mempool_t *pool, + std::string_view input, + std::shared_ptr<css_style_sheet> &&existing) + -> css_return_pair +{ + auto parse_res = rspamd::css::parse_css(pool, input, + std::forward<std::shared_ptr<css_style_sheet>>(existing)); + + if (parse_res.has_value()) { + return std::make_pair(parse_res.value(), css_parse_error()); + } + + return std::make_pair(nullptr, parse_res.error()); +} + +}// namespace rspamd::css
\ No newline at end of file diff --git a/src/libserver/css/css.hxx b/src/libserver/css/css.hxx new file mode 100644 index 0000000..f0f8120 --- /dev/null +++ b/src/libserver/css/css.hxx @@ -0,0 +1,68 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef RSPAMD_CSS_HXX +#define RSPAMD_CSS_HXX + +#include <string> +#include <memory> +#include "logger.h" +#include "css_rule.hxx" +#include "css_selector.hxx" + +namespace rspamd::html { +/* Forward declaration */ +struct html_tag; +struct html_block; +}// namespace rspamd::html + +namespace rspamd::css { + +extern int rspamd_css_log_id; + +#define msg_debug_css(...) rspamd_conditional_debug_fast(NULL, NULL, \ + rspamd_css_log_id, "css", pool->tag.uid, \ + __FUNCTION__, \ + __VA_ARGS__) +#define msg_err_css(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \ + "css", pool->tag.uid, \ + __FUNCTION__, \ + __VA_ARGS__) + +class css_style_sheet { +public: + css_style_sheet(rspamd_mempool_t *pool); + ~css_style_sheet(); /* must be declared separately due to pimpl */ + auto add_selector_rule(std::unique_ptr<css_selector> &&selector, + css_declarations_block_ptr decls) -> void; + + auto check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block *; + +private: + class impl; + rspamd_mempool_t *pool; + std::unique_ptr<impl> pimpl; +}; + +using css_return_pair = std::pair<std::shared_ptr<css_style_sheet>, css_parse_error>; +auto css_parse_style(rspamd_mempool_t *pool, + std::string_view input, + std::shared_ptr<css_style_sheet> &&existing) -> css_return_pair; + +}// namespace rspamd::css + +#endif//RSPAMD_CSS_H
\ No newline at end of file diff --git a/src/libserver/css/css_colors_list.hxx b/src/libserver/css/css_colors_list.hxx new file mode 100644 index 0000000..6dfe54f --- /dev/null +++ b/src/libserver/css/css_colors_list.hxx @@ -0,0 +1,738 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_CSS_COLORS_LIST_HXX +#define RSPAMD_CSS_COLORS_LIST_HXX + +#pragma once + +#include <string_view> +#include "contrib/ankerl/unordered_dense.h" +#include "css_value.hxx" + +namespace rspamd::css { + +/* + * List of all colors, intended to use with hashes/sets + * TODO: think about frozen structs when we can deal with 700 values without + * compiler limits... + */ +static const ankerl::unordered_dense::map<std::string_view, css_color> css_colors_map{ + {"aliceblue", {240, 248, 255}}, + {"antiquewhite", {250, 235, 215}}, + {"antiquewhite1", {255, 239, 219}}, + {"antiquewhite2", {238, 223, 204}}, + {"antiquewhite3", {205, 192, 176}}, + {"antiquewhite4", {139, 131, 120}}, + {"aqua", {0, 255, 255}}, + {"aquamarine", {127, 255, 212}}, + {"aquamarine1", {127, 255, 212}}, + {"aquamarine2", {118, 238, 198}}, + {"aquamarine3", {102, 205, 170}}, + {"aquamarine4", {69, 139, 116}}, + {"azure", {240, 255, 255}}, + {"azure1", {240, 255, 255}}, + {"azure2", {224, 238, 238}}, + {"azure3", {193, 205, 205}}, + {"azure4", {131, 139, 139}}, + {"beige", {245, 245, 220}}, + {"bisque", {255, 228, 196}}, + {"bisque1", {255, 228, 196}}, + {"bisque2", {238, 213, 183}}, + {"bisque3", {205, 183, 158}}, + {"bisque4", {139, 125, 107}}, + {"black", {0, 0, 0}}, + {"blanchedalmond", {255, 235, 205}}, + {"blue", {0, 0, 255}}, + {"blue1", {0, 0, 255}}, + {"blue2", {0, 0, 238}}, + {"blue3", {0, 0, 205}}, + {"blue4", {0, 0, 139}}, + {"blueviolet", {138, 43, 226}}, + {"brown", {165, 42, 42}}, + {"brown1", {255, 64, 64}}, + {"brown2", {238, 59, 59}}, + {"brown3", {205, 51, 51}}, + {"brown4", {139, 35, 35}}, + {"burlywood", {222, 184, 135}}, + {"burlywood1", {255, 211, 155}}, + {"burlywood2", {238, 197, 145}}, + {"burlywood3", {205, 170, 125}}, + {"burlywood4", {139, 115, 85}}, + {"cadetblue", {95, 158, 160}}, + {"cadetblue1", {152, 245, 255}}, + {"cadetblue2", {142, 229, 238}}, + {"cadetblue3", {122, 197, 205}}, + {"cadetblue4", {83, 134, 139}}, + {"chartreuse", {127, 255, 0}}, + {"chartreuse1", {127, 255, 0}}, + {"chartreuse2", {118, 238, 0}}, + {"chartreuse3", {102, 205, 0}}, + {"chartreuse4", {69, 139, 0}}, + {"chocolate", {210, 105, 30}}, + {"chocolate1", {255, 127, 36}}, + {"chocolate2", {238, 118, 33}}, + {"chocolate3", {205, 102, 29}}, + {"chocolate4", {139, 69, 19}}, + {"coral", {255, 127, 80}}, + {"coral1", {255, 114, 86}}, + {"coral2", {238, 106, 80}}, + {"coral3", {205, 91, 69}}, + {"coral4", {139, 62, 47}}, + {"cornflowerblue", {100, 149, 237}}, + {"cornsilk", {255, 248, 220}}, + {"cornsilk1", {255, 248, 220}}, + {"cornsilk2", {238, 232, 205}}, + {"cornsilk3", {205, 200, 177}}, + {"cornsilk4", {139, 136, 120}}, + {"crimson", {220, 20, 60}}, + {"cyan", {0, 255, 255}}, + {"cyan1", {0, 255, 255}}, + {"cyan2", {0, 238, 238}}, + {"cyan3", {0, 205, 205}}, + {"cyan4", {0, 139, 139}}, + {"darkblue", {0, 0, 139}}, + {"darkcyan", {0, 139, 139}}, + {"darkgoldenrod", {184, 134, 11}}, + {"darkgoldenrod1", {255, 185, 15}}, + {"darkgoldenrod2", {238, 173, 14}}, + {"darkgoldenrod3", {205, 149, 12}}, + {"darkgoldenrod4", {139, 101, 8}}, + {"darkgray", {169, 169, 169}}, + {"darkgreen", {0, 100, 0}}, + {"darkgrey", {169, 169, 169}}, + {"darkkhaki", {189, 183, 107}}, + {"darkmagenta", {139, 0, 139}}, + {"darkolivegreen", {85, 107, 47}}, + {"darkolivegreen1", {202, 255, 112}}, + {"darkolivegreen2", {188, 238, 104}}, + {"darkolivegreen3", {162, 205, 90}}, + {"darkolivegreen4", {110, 139, 61}}, + {"darkorange", {255, 140, 0}}, + {"darkorange1", {255, 127, 0}}, + {"darkorange2", {238, 118, 0}}, + {"darkorange3", {205, 102, 0}}, + {"darkorange4", {139, 69, 0}}, + {"darkorchid", {153, 50, 204}}, + {"darkorchid1", {191, 62, 255}}, + {"darkorchid2", {178, 58, 238}}, + {"darkorchid3", {154, 50, 205}}, + {"darkorchid4", {104, 34, 139}}, + {"darkred", {139, 0, 0}}, + {"darksalmon", {233, 150, 122}}, + {"darkseagreen", {143, 188, 143}}, + {"darkseagreen1", {193, 255, 193}}, + {"darkseagreen2", {180, 238, 180}}, + {"darkseagreen3", {155, 205, 155}}, + {"darkseagreen4", {105, 139, 105}}, + {"darkslateblue", {72, 61, 139}}, + {"darkslategray", {47, 79, 79}}, + {"darkslategray1", {151, 255, 255}}, + {"darkslategray2", {141, 238, 238}}, + {"darkslategray3", {121, 205, 205}}, + {"darkslategray4", {82, 139, 139}}, + {"darkslategrey", {47, 79, 79}}, + {"darkturquoise", {0, 206, 209}}, + {"darkviolet", {148, 0, 211}}, + {"deeppink", {255, 20, 147}}, + {"deeppink1", {255, 20, 147}}, + {"deeppink2", {238, 18, 137}}, + {"deeppink3", {205, 16, 118}}, + {"deeppink4", {139, 10, 80}}, + {"deepskyblue", {0, 191, 255}}, + {"deepskyblue1", {0, 191, 255}}, + {"deepskyblue2", {0, 178, 238}}, + {"deepskyblue3", {0, 154, 205}}, + {"deepskyblue4", {0, 104, 139}}, + {"dimgray", {105, 105, 105}}, + {"dimgrey", {105, 105, 105}}, + {"dodgerblue", {30, 144, 255}}, + {"dodgerblue1", {30, 144, 255}}, + {"dodgerblue2", {28, 134, 238}}, + {"dodgerblue3", {24, 116, 205}}, + {"dodgerblue4", {16, 78, 139}}, + {"firebrick", {178, 34, 34}}, + {"firebrick1", {255, 48, 48}}, + {"firebrick2", {238, 44, 44}}, + {"firebrick3", {205, 38, 38}}, + {"firebrick4", {139, 26, 26}}, + {"floralwhite", {255, 250, 240}}, + {"forestgreen", {34, 139, 34}}, + {"fuchsia", {255, 0, 255}}, + {"gainsboro", {220, 220, 220}}, + {"ghostwhite", {248, 248, 255}}, + {"gold", {255, 215, 0}}, + {"gold1", {255, 215, 0}}, + {"gold2", {238, 201, 0}}, + {"gold3", {205, 173, 0}}, + {"gold4", {139, 117, 0}}, + {"goldenrod", {218, 165, 32}}, + {"goldenrod1", {255, 193, 37}}, + {"goldenrod2", {238, 180, 34}}, + {"goldenrod3", {205, 155, 29}}, + {"goldenrod4", {139, 105, 20}}, + {"gray", {190, 190, 190}}, + {"gray0", {0, 0, 0}}, + {"gray1", {3, 3, 3}}, + {"gray10", {26, 26, 26}}, + {"gray100", {255, 255, 255}}, + {"gray11", {28, 28, 28}}, + {"gray12", {31, 31, 31}}, + {"gray13", {33, 33, 33}}, + {"gray14", {36, 36, 36}}, + {"gray15", {38, 38, 38}}, + {"gray16", {41, 41, 41}}, + {"gray17", {43, 43, 43}}, + {"gray18", {46, 46, 46}}, + {"gray19", {48, 48, 48}}, + {"gray2", {5, 5, 5}}, + {"gray20", {51, 51, 51}}, + {"gray21", {54, 54, 54}}, + {"gray22", {56, 56, 56}}, + {"gray23", {59, 59, 59}}, + {"gray24", {61, 61, 61}}, + {"gray25", {64, 64, 64}}, + {"gray26", {66, 66, 66}}, + {"gray27", {69, 69, 69}}, + {"gray28", {71, 71, 71}}, + {"gray29", {74, 74, 74}}, + {"gray3", {8, 8, 8}}, + {"gray30", {77, 77, 77}}, + {"gray31", {79, 79, 79}}, + {"gray32", {82, 82, 82}}, + {"gray33", {84, 84, 84}}, + {"gray34", {87, 87, 87}}, + {"gray35", {89, 89, 89}}, + {"gray36", {92, 92, 92}}, + {"gray37", {94, 94, 94}}, + {"gray38", {97, 97, 97}}, + {"gray39", {99, 99, 99}}, + {"gray4", {10, 10, 10}}, + {"gray40", {102, 102, 102}}, + {"gray41", {105, 105, 105}}, + {"gray42", {107, 107, 107}}, + {"gray43", {110, 110, 110}}, + {"gray44", {112, 112, 112}}, + {"gray45", {115, 115, 115}}, + {"gray46", {117, 117, 117}}, + {"gray47", {120, 120, 120}}, + {"gray48", {122, 122, 122}}, + {"gray49", {125, 125, 125}}, + {"gray5", {13, 13, 13}}, + {"gray50", {127, 127, 127}}, + {"gray51", {130, 130, 130}}, + {"gray52", {133, 133, 133}}, + {"gray53", {135, 135, 135}}, + {"gray54", {138, 138, 138}}, + {"gray55", {140, 140, 140}}, + {"gray56", {143, 143, 143}}, + {"gray57", {145, 145, 145}}, + {"gray58", {148, 148, 148}}, + {"gray59", {150, 150, 150}}, + {"gray6", {15, 15, 15}}, + {"gray60", {153, 153, 153}}, + {"gray61", {156, 156, 156}}, + {"gray62", {158, 158, 158}}, + {"gray63", {161, 161, 161}}, + {"gray64", {163, 163, 163}}, + {"gray65", {166, 166, 166}}, + {"gray66", {168, 168, 168}}, + {"gray67", {171, 171, 171}}, + {"gray68", {173, 173, 173}}, + {"gray69", {176, 176, 176}}, + {"gray7", {18, 18, 18}}, + {"gray70", {179, 179, 179}}, + {"gray71", {181, 181, 181}}, + {"gray72", {184, 184, 184}}, + {"gray73", {186, 186, 186}}, + {"gray74", {189, 189, 189}}, + {"gray75", {191, 191, 191}}, + {"gray76", {194, 194, 194}}, + {"gray77", {196, 196, 196}}, + {"gray78", {199, 199, 199}}, + {"gray79", {201, 201, 201}}, + {"gray8", {20, 20, 20}}, + {"gray80", {204, 204, 204}}, + {"gray81", {207, 207, 207}}, + {"gray82", {209, 209, 209}}, + {"gray83", {212, 212, 212}}, + {"gray84", {214, 214, 214}}, + {"gray85", {217, 217, 217}}, + {"gray86", {219, 219, 219}}, + {"gray87", {222, 222, 222}}, + {"gray88", {224, 224, 224}}, + {"gray89", {227, 227, 227}}, + {"gray9", {23, 23, 23}}, + {"gray90", {229, 229, 229}}, + {"gray91", {232, 232, 232}}, + {"gray92", {235, 235, 235}}, + {"gray93", {237, 237, 237}}, + {"gray94", {240, 240, 240}}, + {"gray95", {242, 242, 242}}, + {"gray96", {245, 245, 245}}, + {"gray97", {247, 247, 247}}, + {"gray98", {250, 250, 250}}, + {"gray99", {252, 252, 252}}, + {"green", {0, 255, 0}}, + {"green1", {0, 255, 0}}, + {"green2", {0, 238, 0}}, + {"green3", {0, 205, 0}}, + {"green4", {0, 139, 0}}, + {"greenyellow", {173, 255, 47}}, + {"grey", {190, 190, 190}}, + {"grey0", {0, 0, 0}}, + {"grey1", {3, 3, 3}}, + {"grey10", {26, 26, 26}}, + {"grey100", {255, 255, 255}}, + {"grey11", {28, 28, 28}}, + {"grey12", {31, 31, 31}}, + {"grey13", {33, 33, 33}}, + {"grey14", {36, 36, 36}}, + {"grey15", {38, 38, 38}}, + {"grey16", {41, 41, 41}}, + {"grey17", {43, 43, 43}}, + {"grey18", {46, 46, 46}}, + {"grey19", {48, 48, 48}}, + {"grey2", {5, 5, 5}}, + {"grey20", {51, 51, 51}}, + {"grey21", {54, 54, 54}}, + {"grey22", {56, 56, 56}}, + {"grey23", {59, 59, 59}}, + {"grey24", {61, 61, 61}}, + {"grey25", {64, 64, 64}}, + {"grey26", {66, 66, 66}}, + {"grey27", {69, 69, 69}}, + {"grey28", {71, 71, 71}}, + {"grey29", {74, 74, 74}}, + {"grey3", {8, 8, 8}}, + {"grey30", {77, 77, 77}}, + {"grey31", {79, 79, 79}}, + {"grey32", {82, 82, 82}}, + {"grey33", {84, 84, 84}}, + {"grey34", {87, 87, 87}}, + {"grey35", {89, 89, 89}}, + {"grey36", {92, 92, 92}}, + {"grey37", {94, 94, 94}}, + {"grey38", {97, 97, 97}}, + {"grey39", {99, 99, 99}}, + {"grey4", {10, 10, 10}}, + {"grey40", {102, 102, 102}}, + {"grey41", {105, 105, 105}}, + {"grey42", {107, 107, 107}}, + {"grey43", {110, 110, 110}}, + {"grey44", {112, 112, 112}}, + {"grey45", {115, 115, 115}}, + {"grey46", {117, 117, 117}}, + {"grey47", {120, 120, 120}}, + {"grey48", {122, 122, 122}}, + {"grey49", {125, 125, 125}}, + {"grey5", {13, 13, 13}}, + {"grey50", {127, 127, 127}}, + {"grey51", {130, 130, 130}}, + {"grey52", {133, 133, 133}}, + {"grey53", {135, 135, 135}}, + {"grey54", {138, 138, 138}}, + {"grey55", {140, 140, 140}}, + {"grey56", {143, 143, 143}}, + {"grey57", {145, 145, 145}}, + {"grey58", {148, 148, 148}}, + {"grey59", {150, 150, 150}}, + {"grey6", {15, 15, 15}}, + {"grey60", {153, 153, 153}}, + {"grey61", {156, 156, 156}}, + {"grey62", {158, 158, 158}}, + {"grey63", {161, 161, 161}}, + {"grey64", {163, 163, 163}}, + {"grey65", {166, 166, 166}}, + {"grey66", {168, 168, 168}}, + {"grey67", {171, 171, 171}}, + {"grey68", {173, 173, 173}}, + {"grey69", {176, 176, 176}}, + {"grey7", {18, 18, 18}}, + {"grey70", {179, 179, 179}}, + {"grey71", {181, 181, 181}}, + {"grey72", {184, 184, 184}}, + {"grey73", {186, 186, 186}}, + {"grey74", {189, 189, 189}}, + {"grey75", {191, 191, 191}}, + {"grey76", {194, 194, 194}}, + {"grey77", {196, 196, 196}}, + {"grey78", {199, 199, 199}}, + {"grey79", {201, 201, 201}}, + {"grey8", {20, 20, 20}}, + {"grey80", {204, 204, 204}}, + {"grey81", {207, 207, 207}}, + {"grey82", {209, 209, 209}}, + {"grey83", {212, 212, 212}}, + {"grey84", {214, 214, 214}}, + {"grey85", {217, 217, 217}}, + {"grey86", {219, 219, 219}}, + {"grey87", {222, 222, 222}}, + {"grey88", {224, 224, 224}}, + {"grey89", {227, 227, 227}}, + {"grey9", {23, 23, 23}}, + {"grey90", {229, 229, 229}}, + {"grey91", {232, 232, 232}}, + {"grey92", {235, 235, 235}}, + {"grey93", {237, 237, 237}}, + {"grey94", {240, 240, 240}}, + {"grey95", {242, 242, 242}}, + {"grey96", {245, 245, 245}}, + {"grey97", {247, 247, 247}}, + {"grey98", {250, 250, 250}}, + {"grey99", {252, 252, 252}}, + {"honeydew", {240, 255, 240}}, + {"honeydew1", {240, 255, 240}}, + {"honeydew2", {224, 238, 224}}, + {"honeydew3", {193, 205, 193}}, + {"honeydew4", {131, 139, 131}}, + {"hotpink", {255, 105, 180}}, + {"hotpink1", {255, 110, 180}}, + {"hotpink2", {238, 106, 167}}, + {"hotpink3", {205, 96, 144}}, + {"hotpink4", {139, 58, 98}}, + {"indianred", {205, 92, 92}}, + {"indianred1", {255, 106, 106}}, + {"indianred2", {238, 99, 99}}, + {"indianred3", {205, 85, 85}}, + {"indianred4", {139, 58, 58}}, + {"indigo", {75, 0, 130}}, + {"ivory", {255, 255, 240}}, + {"ivory1", {255, 255, 240}}, + {"ivory2", {238, 238, 224}}, + {"ivory3", {205, 205, 193}}, + {"ivory4", {139, 139, 131}}, + {"khaki", {240, 230, 140}}, + {"khaki1", {255, 246, 143}}, + {"khaki2", {238, 230, 133}}, + {"khaki3", {205, 198, 115}}, + {"khaki4", {139, 134, 78}}, + {"lavender", {230, 230, 250}}, + {"lavenderblush", {255, 240, 245}}, + {"lavenderblush1", {255, 240, 245}}, + {"lavenderblush2", {238, 224, 229}}, + {"lavenderblush3", {205, 193, 197}}, + {"lavenderblush4", {139, 131, 134}}, + {"lawngreen", {124, 252, 0}}, + {"lemonchiffon", {255, 250, 205}}, + {"lemonchiffon1", {255, 250, 205}}, + {"lemonchiffon2", {238, 233, 191}}, + {"lemonchiffon3", {205, 201, 165}}, + {"lemonchiffon4", {139, 137, 112}}, + {"lightblue", {173, 216, 230}}, + {"lightblue1", {191, 239, 255}}, + {"lightblue2", {178, 223, 238}}, + {"lightblue3", {154, 192, 205}}, + {"lightblue4", {104, 131, 139}}, + {"lightcoral", {240, 128, 128}}, + {"lightcyan", {224, 255, 255}}, + {"lightcyan1", {224, 255, 255}}, + {"lightcyan2", {209, 238, 238}}, + {"lightcyan3", {180, 205, 205}}, + {"lightcyan4", {122, 139, 139}}, + {"lightgoldenrod", {238, 221, 130}}, + {"lightgoldenrod1", {255, 236, 139}}, + {"lightgoldenrod2", {238, 220, 130}}, + {"lightgoldenrod3", {205, 190, 112}}, + {"lightgoldenrod4", {139, 129, 76}}, + {"lightgoldenrodyellow", {250, 250, 210}}, + {"lightgray", {211, 211, 211}}, + {"lightgreen", {144, 238, 144}}, + {"lightgrey", {211, 211, 211}}, + {"lightpink", {255, 182, 193}}, + {"lightpink1", {255, 174, 185}}, + {"lightpink2", {238, 162, 173}}, + {"lightpink3", {205, 140, 149}}, + {"lightpink4", {139, 95, 101}}, + {"lightsalmon", {255, 160, 122}}, + {"lightsalmon1", {255, 160, 122}}, + {"lightsalmon2", {238, 149, 114}}, + {"lightsalmon3", {205, 129, 98}}, + {"lightsalmon4", {139, 87, 66}}, + {"lightseagreen", {32, 178, 170}}, + {"lightskyblue", {135, 206, 250}}, + {"lightskyblue1", {176, 226, 255}}, + {"lightskyblue2", {164, 211, 238}}, + {"lightskyblue3", {141, 182, 205}}, + {"lightskyblue4", {96, 123, 139}}, + {"lightslateblue", {132, 112, 255}}, + {"lightslategray", {119, 136, 153}}, + {"lightslategrey", {119, 136, 153}}, + {"lightsteelblue", {176, 196, 222}}, + {"lightsteelblue1", {202, 225, 255}}, + {"lightsteelblue2", {188, 210, 238}}, + {"lightsteelblue3", {162, 181, 205}}, + {"lightsteelblue4", {110, 123, 139}}, + {"lightyellow", {255, 255, 224}}, + {"lightyellow1", {255, 255, 224}}, + {"lightyellow2", {238, 238, 209}}, + {"lightyellow3", {205, 205, 180}}, + {"lightyellow4", {139, 139, 122}}, + {"lime", {0, 255, 0}}, + {"limegreen", {50, 205, 50}}, + {"linen", {250, 240, 230}}, + {"magenta", {255, 0, 255}}, + {"magenta1", {255, 0, 255}}, + {"magenta2", {238, 0, 238}}, + {"magenta3", {205, 0, 205}}, + {"magenta4", {139, 0, 139}}, + {"maroon", {176, 48, 96}}, + {"maroon1", {255, 52, 179}}, + {"maroon2", {238, 48, 167}}, + {"maroon3", {205, 41, 144}}, + {"maroon4", {139, 28, 98}}, + {"mediumaquamarine", {102, 205, 170}}, + {"mediumblue", {0, 0, 205}}, + {"mediumorchid", {186, 85, 211}}, + {"mediumorchid1", {224, 102, 255}}, + {"mediumorchid2", {209, 95, 238}}, + {"mediumorchid3", {180, 82, 205}}, + {"mediumorchid4", {122, 55, 139}}, + {"mediumpurple", {147, 112, 219}}, + {"mediumpurple1", {171, 130, 255}}, + {"mediumpurple2", {159, 121, 238}}, + {"mediumpurple3", {137, 104, 205}}, + {"mediumpurple4", {93, 71, 139}}, + {"mediumseagreen", {60, 179, 113}}, + {"mediumslateblue", {123, 104, 238}}, + {"mediumspringgreen", {0, 250, 154}}, + {"mediumturquoise", {72, 209, 204}}, + {"mediumvioletred", {199, 21, 133}}, + {"midnightblue", {25, 25, 112}}, + {"mintcream", {245, 255, 250}}, + {"mistyrose", {255, 228, 225}}, + {"mistyrose1", {255, 228, 225}}, + {"mistyrose2", {238, 213, 210}}, + {"mistyrose3", {205, 183, 181}}, + {"mistyrose4", {139, 125, 123}}, + {"moccasin", {255, 228, 181}}, + {"navajowhite", {255, 222, 173}}, + {"navajowhite1", {255, 222, 173}}, + {"navajowhite2", {238, 207, 161}}, + {"navajowhite3", {205, 179, 139}}, + {"navajowhite4", {139, 121, 94}}, + {"navy", {0, 0, 128}}, + {"navyblue", {0, 0, 128}}, + {"oldlace", {253, 245, 230}}, + {"olive", {128, 128, 0}}, + {"olivedrab", {107, 142, 35}}, + {"olivedrab1", {192, 255, 62}}, + {"olivedrab2", {179, 238, 58}}, + {"olivedrab3", {154, 205, 50}}, + {"olivedrab4", {105, 139, 34}}, + {"orange", {255, 165, 0}}, + {"orange1", {255, 165, 0}}, + {"orange2", {238, 154, 0}}, + {"orange3", {205, 133, 0}}, + {"orange4", {139, 90, 0}}, + {"orangered", {255, 69, 0}}, + {"orangered1", {255, 69, 0}}, + {"orangered2", {238, 64, 0}}, + {"orangered3", {205, 55, 0}}, + {"orangered4", {139, 37, 0}}, + {"orchid", {218, 112, 214}}, + {"orchid1", {255, 131, 250}}, + {"orchid2", {238, 122, 233}}, + {"orchid3", {205, 105, 201}}, + {"orchid4", {139, 71, 137}}, + {"palegoldenrod", {238, 232, 170}}, + {"palegreen", {152, 251, 152}}, + {"palegreen1", {154, 255, 154}}, + {"palegreen2", {144, 238, 144}}, + {"palegreen3", {124, 205, 124}}, + {"palegreen4", {84, 139, 84}}, + {"paleturquoise", {175, 238, 238}}, + {"paleturquoise1", {187, 255, 255}}, + {"paleturquoise2", {174, 238, 238}}, + {"paleturquoise3", {150, 205, 205}}, + {"paleturquoise4", {102, 139, 139}}, + {"palevioletred", {219, 112, 147}}, + {"palevioletred1", {255, 130, 171}}, + {"palevioletred2", {238, 121, 159}}, + {"palevioletred3", {205, 104, 137}}, + {"palevioletred4", {139, 71, 93}}, + {"papayawhip", {255, 239, 213}}, + {"peachpuff", {255, 218, 185}}, + {"peachpuff1", {255, 218, 185}}, + {"peachpuff2", {238, 203, 173}}, + {"peachpuff3", {205, 175, 149}}, + {"peachpuff4", {139, 119, 101}}, + {"peru", {205, 133, 63}}, + {"pink", {255, 192, 203}}, + {"pink1", {255, 181, 197}}, + {"pink2", {238, 169, 184}}, + {"pink3", {205, 145, 158}}, + {"pink4", {139, 99, 108}}, + {"plum", {221, 160, 221}}, + {"plum1", {255, 187, 255}}, + {"plum2", {238, 174, 238}}, + {"plum3", {205, 150, 205}}, + {"plum4", {139, 102, 139}}, + {"powderblue", {176, 224, 230}}, + {"purple", {160, 32, 240}}, + {"purple1", {155, 48, 255}}, + {"purple2", {145, 44, 238}}, + {"purple3", {125, 38, 205}}, + {"purple4", {85, 26, 139}}, + {"rebeccapurple", {102, 51, 153}}, + {"red", {255, 0, 0}}, + {"red1", {255, 0, 0}}, + {"red2", {238, 0, 0}}, + {"red3", {205, 0, 0}}, + {"red4", {139, 0, 0}}, + {"rosybrown", {188, 143, 143}}, + {"rosybrown1", {255, 193, 193}}, + {"rosybrown2", {238, 180, 180}}, + {"rosybrown3", {205, 155, 155}}, + {"rosybrown4", {139, 105, 105}}, + {"royalblue", {65, 105, 225}}, + {"royalblue1", {72, 118, 255}}, + {"royalblue2", {67, 110, 238}}, + {"royalblue3", {58, 95, 205}}, + {"royalblue4", {39, 64, 139}}, + {"saddlebrown", {139, 69, 19}}, + {"salmon", {250, 128, 114}}, + {"salmon1", {255, 140, 105}}, + {"salmon2", {238, 130, 98}}, + {"salmon3", {205, 112, 84}}, + {"salmon4", {139, 76, 57}}, + {"sandybrown", {244, 164, 96}}, + {"seagreen", {46, 139, 87}}, + {"seagreen1", {84, 255, 159}}, + {"seagreen2", {78, 238, 148}}, + {"seagreen3", {67, 205, 128}}, + {"seagreen4", {46, 139, 87}}, + {"seashell", {255, 245, 238}}, + {"seashell1", {255, 245, 238}}, + {"seashell2", {238, 229, 222}}, + {"seashell3", {205, 197, 191}}, + {"seashell4", {139, 134, 130}}, + {"sienna", {160, 82, 45}}, + {"sienna1", {255, 130, 71}}, + {"sienna2", {238, 121, 66}}, + {"sienna3", {205, 104, 57}}, + {"sienna4", {139, 71, 38}}, + {"silver", {192, 192, 192}}, + {"skyblue", {135, 206, 235}}, + {"skyblue1", {135, 206, 255}}, + {"skyblue2", {126, 192, 238}}, + {"skyblue3", {108, 166, 205}}, + {"skyblue4", {74, 112, 139}}, + {"slateblue", {106, 90, 205}}, + {"slateblue1", {131, 111, 255}}, + {"slateblue2", {122, 103, 238}}, + {"slateblue3", {105, 89, 205}}, + {"slateblue4", {71, 60, 139}}, + {"slategray", {112, 128, 144}}, + {"slategray1", {198, 226, 255}}, + {"slategray2", {185, 211, 238}}, + {"slategray3", {159, 182, 205}}, + {"slategray4", {108, 123, 139}}, + {"slategrey", {112, 128, 144}}, + {"snow", {255, 250, 250}}, + {"snow1", {255, 250, 250}}, + {"snow2", {238, 233, 233}}, + {"snow3", {205, 201, 201}}, + {"snow4", {139, 137, 137}}, + {"springgreen", {0, 255, 127}}, + {"springgreen1", {0, 255, 127}}, + {"springgreen2", {0, 238, 118}}, + {"springgreen3", {0, 205, 102}}, + {"springgreen4", {0, 139, 69}}, + {"steelblue", {70, 130, 180}}, + {"steelblue1", {99, 184, 255}}, + {"steelblue2", {92, 172, 238}}, + {"steelblue3", {79, 148, 205}}, + {"steelblue4", {54, 100, 139}}, + {"tan", {210, 180, 140}}, + {"tan1", {255, 165, 79}}, + {"tan2", {238, 154, 73}}, + {"tan3", {205, 133, 63}}, + {"tan4", {139, 90, 43}}, + {"teal", {0, 128, 128}}, + {"thistle", {216, 191, 216}}, + {"thistle1", {255, 225, 255}}, + {"thistle2", {238, 210, 238}}, + {"thistle3", {205, 181, 205}}, + {"thistle4", {139, 123, 139}}, + {"tomato", {255, 99, 71}}, + {"tomato1", {255, 99, 71}}, + {"tomato2", {238, 92, 66}}, + {"tomato3", {205, 79, 57}}, + {"tomato4", {139, 54, 38}}, + {"turquoise", {64, 224, 208}}, + {"turquoise1", {0, 245, 255}}, + {"turquoise2", {0, 229, 238}}, + {"turquoise3", {0, 197, 205}}, + {"turquoise4", {0, 134, 139}}, + {"violet", {238, 130, 238}}, + {"violetred", {208, 32, 144}}, + {"violetred1", {255, 62, 150}}, + {"violetred2", {238, 58, 140}}, + {"violetred3", {205, 50, 120}}, + {"violetred4", {139, 34, 82}}, + {"webgray", {128, 128, 128}}, + {"webgreen", {0, 128, 0}}, + {"webgrey", {128, 128, 128}}, + {"webmaroon", {128, 0, 0}}, + {"webpurple", {128, 0, 128}}, + {"wheat", {245, 222, 179}}, + {"wheat1", {255, 231, 186}}, + {"wheat2", {238, 216, 174}}, + {"wheat3", {205, 186, 150}}, + {"wheat4", {139, 126, 102}}, + {"white", {255, 255, 255}}, + {"whitesmoke", {245, 245, 245}}, + {"x11gray", {190, 190, 190}}, + {"x11green", {0, 255, 0}}, + {"x11grey", {190, 190, 190}}, + {"x11maroon", {176, 48, 96}}, + {"x11purple", {160, 32, 240}}, + {"yellow", {255, 255, 0}}, + {"yellow1", {255, 255, 0}}, + {"yellow2", {238, 238, 0}}, + {"yellow3", {205, 205, 0}}, + {"yellow4", {139, 139, 0}}, + {"yellowgreen", {154, 205, 50}}, + {"activeborder", {180, 180, 180}}, + {"activecaption", {153, 180, 209}}, + {"appworkspace", {171, 171, 171}}, + {"background", {0, 0, 0}}, + {"buttonhighlight", {255, 255, 255}}, + {"buttonshadow", {160, 160, 160}}, + {"captiontext", {0, 0, 0}}, + {"inactiveborder", {244, 247, 252}}, + {"inactivecaption", {191, 205, 219}}, + {"inactivecaptiontext", {0, 0, 0}}, + {"infobackground", {255, 255, 225}}, + {"infotext", {0, 0, 0}}, + {"menu", {240, 240, 240}}, + {"menutext", {0, 0, 0}}, + {"scrollbar", {200, 200, 200}}, + {"threeddarkshadow", {0, 0, 0}}, + {"threedface", {0, 0, 0}}, + {"threedhighlight", {0, 0, 0}}, + {"threedlightshadow", {0, 0, 0}}, + {"threedshadow", {0, 0, 0}}, + {"transparent", {0, 0, 0, 0}}, + {"window", {255, 255, 255}}, + {"windowframe", {100, 100, 100}}, + {"windowtext", {0, 0, 0}}, +}; + +}// namespace rspamd::css + +#endif//RSPAMD_CSS_COLORS_LIST_HXX diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx new file mode 100644 index 0000000..aed035a --- /dev/null +++ b/src/libserver/css/css_parser.cxx @@ -0,0 +1,892 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_parser.hxx" +#include "css_tokeniser.hxx" +#include "css_selector.hxx" +#include "css_rule.hxx" +#include "css_util.hxx" +#include "css.hxx" +#include "fmt/core.h" + +#include <vector> +#include <unicode/utf8.h> + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +namespace rspamd::css { + +const css_consumed_block css_parser_eof_block{}; + +auto css_consumed_block::attach_block(consumed_block_ptr &&block) -> bool +{ + if (std::holds_alternative<std::monostate>(content)) { + /* Switch from monostate */ + content = std::vector<consumed_block_ptr>(); + } + else if (!std::holds_alternative<std::vector<consumed_block_ptr>>(content)) { + /* A single component, cannot attach a block ! */ + return false; + } + + auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content); + value_vec.push_back(std::move(block)); + + return true; +} + +auto css_consumed_block::add_function_argument(consumed_block_ptr &&block) -> bool +{ + if (!std::holds_alternative<css_function_block>(content)) { + return false; + } + + auto &&func_bloc = std::get<css_function_block>(content); + func_bloc.args.push_back(std::move(block)); + + return true; +} + +auto css_consumed_block::token_type_str(void) const -> const char * +{ + const auto *ret = ""; + + switch (tag) { + case parser_tag_type::css_top_block: + ret = "top"; + break; + case parser_tag_type::css_qualified_rule: + ret = "qualified rule"; + break; + case parser_tag_type::css_at_rule: + ret = "at rule"; + break; + case parser_tag_type::css_simple_block: + ret = "simple block"; + break; + case parser_tag_type::css_function: + ret = "function"; + break; + case parser_tag_type::css_function_arg: + ret = "function arg"; + break; + case parser_tag_type::css_component: + ret = "component"; + break; + case parser_tag_type::css_eof_block: + ret = "eof"; + break; + } + + return ret; +} + +auto css_consumed_block::debug_str(void) -> std::string +{ + std::string ret = fmt::format(R"("type": "{}", "value": )", token_type_str()); + + std::visit([&](auto &arg) { + using T = std::decay_t<decltype(arg)>; + + if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) { + /* Array of blocks */ + ret += "["; + for (const auto &block: arg) { + ret += "{"; + ret += block->debug_str(); + ret += "}, "; + } + + if (*(--ret.end()) == ' ') { + ret.pop_back(); + ret.pop_back(); /* Last ',' */ + } + ret += "]"; + } + else if constexpr (std::is_same_v<T, std::monostate>) { + /* Empty block */ + ret += R"("empty")"; + } + else if constexpr (std::is_same_v<T, css_function_block>) { + ret += R"({ "content": {"token": )"; + ret += "\"" + arg.function.debug_token_str() + "\", "; + ret += R"("arguments": [)"; + + for (const auto &block: arg.args) { + ret += "{"; + ret += block->debug_str(); + ret += "}, "; + } + if (*(--ret.end()) == ' ') { + ret.pop_back(); + ret.pop_back(); /* Last ',' */ + } + ret += "]}}"; + } + else { + /* Single element block */ + ret += "\"" + arg.debug_token_str() + "\""; + } + }, + content); + + return ret; +} + +class css_parser { +public: + css_parser(void) = delete; /* Require mempool to be set for logging */ + explicit css_parser(rspamd_mempool_t *pool) + : pool(pool) + { + style_object.reset(); + error.type = css_parse_error_type::PARSE_ERROR_NO_ERROR; + } + + /* + * This constructor captures existing via unique_ptr, but it does not + * destruct it on errors (we assume that it is owned somewhere else) + */ + explicit css_parser(std::shared_ptr<css_style_sheet> &&existing, rspamd_mempool_t *pool) + : style_object(existing), pool(pool) + { + error.type = css_parse_error_type::PARSE_ERROR_NO_ERROR; + } + + /* + * Process input css blocks + */ + std::unique_ptr<css_consumed_block> consume_css_blocks(const std::string_view &sv); + /* + * Process a single css rule + */ + std::unique_ptr<css_consumed_block> consume_css_rule(const std::string_view &sv); + std::optional<css_parse_error> consume_input(const std::string_view &sv); + + auto get_object_maybe(void) -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error> + { + if (style_object) { + return style_object; + } + + return tl::make_unexpected(error); + } + + /* Helper parser methods */ + static bool need_unescape(const std::string_view &sv); + +private: + std::shared_ptr<css_style_sheet> style_object; + std::unique_ptr<css_tokeniser> tokeniser; + + css_parse_error error; + rspamd_mempool_t *pool; + + int rec_level = 0; + const int max_rec = 20; + bool eof = false; + + /* Consumers */ + auto component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool; + auto function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool; + auto simple_block_consumer(std::unique_ptr<css_consumed_block> &top, + css_parser_token::token_type expected_end, + bool consume_current) -> bool; + auto qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool; + auto at_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool; +}; + +/* + * Find if we need to unescape css + */ +bool css_parser::need_unescape(const std::string_view &sv) +{ + bool in_quote = false; + char quote_char, prev_c = 0; + + for (const auto c: sv) { + if (!in_quote) { + if (c == '"' || c == '\'') { + in_quote = true; + quote_char = c; + } + else if (c == '\\') { + return true; + } + } + else { + if (c == quote_char) { + if (prev_c != '\\') { + in_quote = false; + } + } + prev_c = c; + } + } + + return false; +} + +auto css_parser::function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool +{ + auto ret = true, want_more = true; + + msg_debug_css("consume function block; top block: %s, recursion level %d", + top->token_type_str(), rec_level); + + if (++rec_level > max_rec) { + msg_err_css("max nesting reached, ignore style"); + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING, + "maximum nesting has reached when parsing function value"); + return false; + } + + while (ret && want_more && !eof) { + auto next_token = tokeniser->next_token(); + + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + case css_parser_token::token_type::ebrace_token: + ret = true; + want_more = false; + break; + case css_parser_token::token_type::comma_token: + case css_parser_token::token_type::delim_token: + case css_parser_token::token_type::obrace_token: + break; + default: + /* Attach everything to the function block */ + top->add_function_argument(std::make_unique<css_consumed_block>( + css::css_consumed_block::parser_tag_type::css_function_arg, + std::move(next_token))); + break; + } + } + + --rec_level; + + return ret; +} + +auto css_parser::simple_block_consumer(std::unique_ptr<css_consumed_block> &top, + css_parser_token::token_type expected_end, + bool consume_current) -> bool +{ + auto ret = true; + std::unique_ptr<css_consumed_block> block; + + msg_debug_css("consume simple block; top block: %s, recursion level %d", + top->token_type_str(), rec_level); + + if (!consume_current && ++rec_level > max_rec) { + msg_err_css("max nesting reached, ignore style"); + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING, + "maximum nesting has reached when parsing simple block value"); + return false; + } + + if (!consume_current) { + block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_simple_block); + } + + + while (ret && !eof) { + auto next_token = tokeniser->next_token(); + + if (next_token.type == expected_end) { + break; + } + + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + default: + tokeniser->pushback_token(next_token); + ret = component_value_consumer(consume_current ? top : block); + break; + } + } + + if (!consume_current && ret) { + msg_debug_css("attached node 'simple block' rule %s; length=%d", + block->token_type_str(), (int) block->size()); + top->attach_block(std::move(block)); + } + + if (!consume_current) { + --rec_level; + } + + return ret; +} + +auto css_parser::qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool +{ + msg_debug_css("consume qualified block; top block: %s, recursion level %d", + top->token_type_str(), rec_level); + + if (++rec_level > max_rec) { + msg_err_css("max nesting reached, ignore style"); + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING, + "maximum nesting has reached when parsing qualified rule value"); + return false; + } + + auto ret = true, want_more = true; + auto block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_qualified_rule); + + while (ret && want_more && !eof) { + auto next_token = tokeniser->next_token(); + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::cdo_token: + case css_parser_token::token_type::cdc_token: + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + /* Ignore */ + ret = true; + } + else { + } + break; + case css_parser_token::token_type::ocurlbrace_token: + ret = simple_block_consumer(block, + css_parser_token::token_type::ecurlbrace_token, false); + want_more = false; + break; + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + default: + tokeniser->pushback_token(next_token); + ret = component_value_consumer(block); + break; + }; + } + + if (ret) { + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + msg_debug_css("attached node qualified rule %s; length=%d", + block->token_type_str(), (int) block->size()); + top->attach_block(std::move(block)); + } + } + + --rec_level; + + return ret; +} + +auto css_parser::at_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool +{ + msg_debug_css("consume at-rule block; top block: %s, recursion level %d", + top->token_type_str(), rec_level); + + if (++rec_level > max_rec) { + msg_err_css("max nesting reached, ignore style"); + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING, + "maximum nesting has reached when parsing at keyword"); + return false; + } + + auto ret = true, want_more = true; + auto block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_at_rule); + + while (ret && want_more && !eof) { + auto next_token = tokeniser->next_token(); + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::cdo_token: + case css_parser_token::token_type::cdc_token: + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + /* Ignore */ + ret = true; + } + else { + } + break; + case css_parser_token::token_type::ocurlbrace_token: + ret = simple_block_consumer(block, + css_parser_token::token_type::ecurlbrace_token, false); + want_more = false; + break; + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + case css_parser_token::token_type::semicolon_token: + want_more = false; + break; + default: + tokeniser->pushback_token(next_token); + ret = component_value_consumer(block); + break; + }; + } + + if (ret) { + if (top->tag == css_consumed_block::parser_tag_type::css_top_block) { + msg_debug_css("attached node qualified rule %s; length=%d", + block->token_type_str(), (int) block->size()); + top->attach_block(std::move(block)); + } + } + + --rec_level; + + return ret; +} + +auto css_parser::component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool +{ + auto ret = true, need_more = true; + std::unique_ptr<css_consumed_block> block; + + msg_debug_css("consume component block; top block: %s, recursion level %d", + top->token_type_str(), rec_level); + + if (++rec_level > max_rec) { + error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING, + "maximum nesting has reached when parsing component value"); + return false; + } + + while (ret && need_more && !eof) { + auto next_token = tokeniser->next_token(); + + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::ocurlbrace_token: + block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_simple_block); + ret = simple_block_consumer(block, + css_parser_token::token_type::ecurlbrace_token, + true); + need_more = false; + break; + case css_parser_token::token_type::obrace_token: + block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_simple_block); + ret = simple_block_consumer(block, + css_parser_token::token_type::ebrace_token, + true); + need_more = false; + break; + case css_parser_token::token_type::osqbrace_token: + block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_simple_block); + ret = simple_block_consumer(block, + css_parser_token::token_type::esqbrace_token, + true); + need_more = false; + break; + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + case css_parser_token::token_type::function_token: { + need_more = false; + block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_function, + std::move(next_token)); + + /* Consume the rest */ + ret = function_consumer(block); + break; + } + default: + block = std::make_unique<css_consumed_block>( + css_consumed_block::parser_tag_type::css_component, + std::move(next_token)); + need_more = false; + break; + } + } + + if (ret && block) { + msg_debug_css("attached node component rule %s; length=%d", + block->token_type_str(), (int) block->size()); + top->attach_block(std::move(block)); + } + + --rec_level; + + return ret; +} + +auto css_parser::consume_css_blocks(const std::string_view &sv) -> std::unique_ptr<css_consumed_block> +{ + tokeniser = std::make_unique<css_tokeniser>(pool, sv); + auto ret = true; + + auto consumed_blocks = + std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_top_block); + + while (!eof && ret) { + auto next_token = tokeniser->next_token(); + + switch (next_token.type) { + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::at_keyword_token: + tokeniser->pushback_token(next_token); + ret = at_rule_consumer(consumed_blocks); + break; + default: + tokeniser->pushback_token(next_token); + ret = qualified_rule_consumer(consumed_blocks); + break; + } + } + + tokeniser.reset(nullptr); /* No longer needed */ + + return consumed_blocks; +} + +auto css_parser::consume_css_rule(const std::string_view &sv) -> std::unique_ptr<css_consumed_block> +{ + tokeniser = std::make_unique<css_tokeniser>(pool, sv); + auto ret = true; + + auto rule_block = + std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_simple_block); + + while (!eof && ret) { + auto next_token = tokeniser->next_token(); + + switch (next_token.type) { + case css_parser_token::token_type::eof_token: + eof = true; + break; + case css_parser_token::token_type::whitespace_token: + /* Ignore whitespaces */ + break; + default: + tokeniser->pushback_token(next_token); + ret = component_value_consumer(rule_block); + break; + } + } + + tokeniser.reset(nullptr); /* No longer needed */ + + return rule_block; +} + +std::optional<css_parse_error> +css_parser::consume_input(const std::string_view &sv) +{ + auto &&consumed_blocks = consume_css_blocks(sv); + const auto &rules = consumed_blocks->get_blocks_or_empty(); + + if (rules.empty()) { + if (error.type == css_parse_error_type::PARSE_ERROR_NO_ERROR) { + return css_parse_error(css_parse_error_type::PARSE_ERROR_EMPTY, + "no css rules consumed"); + } + else { + return error; + } + } + + if (!style_object) { + style_object = std::make_shared<css_style_sheet>(pool); + } + + for (auto &&rule: rules) { + /* + * For now, we do not need any of the at rules, so we can safely ignore them + */ + auto &&children = rule->get_blocks_or_empty(); + + if (children.size() > 1 && + children[0]->tag == css_consumed_block::parser_tag_type::css_component) { + auto simple_block = std::find_if(children.begin(), children.end(), + [](auto &bl) { + return bl->tag == css_consumed_block::parser_tag_type::css_simple_block; + }); + + if (simple_block != children.end()) { + /* + * We have a component and a simple block, + * so we can parse a selector and then extract + * declarations from a simple block + */ + + /* First, tag all components as preamble */ + auto selector_it = children.cbegin(); + + auto selector_token_functor = [&selector_it, &simple_block](void) + -> const css_consumed_block & { + for (;;) { + if (selector_it == simple_block) { + return css_parser_eof_block; + } + + const auto &ret = (*selector_it); + + ++selector_it; + + return *ret; + } + }; + + auto selectors_vec = process_selector_tokens(pool, selector_token_functor); + + if (selectors_vec.size() > 0) { + msg_debug_css("processed %d selectors", (int) selectors_vec.size()); + auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin(); + auto decls_end = (*simple_block)->get_blocks_or_empty().cend(); + auto declaration_token_functor = [&decls_it, &decls_end](void) + -> const css_consumed_block & { + for (;;) { + if (decls_it == decls_end) { + return css_parser_eof_block; + } + + const auto &ret = (*decls_it); + + ++decls_it; + + return *ret; + } + }; + + auto declarations_vec = process_declaration_tokens(pool, + declaration_token_functor); + + if (declarations_vec && !declarations_vec->get_rules().empty()) { + msg_debug_css("processed %d rules", + (int) declarations_vec->get_rules().size()); + + for (auto &&selector: selectors_vec) { + style_object->add_selector_rule(std::move(selector), + declarations_vec); + } + } + } + } + } + } + + auto debug_str = consumed_blocks->debug_str(); + msg_debug_css("consumed css: {%*s}", (int) debug_str.size(), debug_str.data()); + + return std::nullopt; +} + +auto get_selectors_parser_functor(rspamd_mempool_t *pool, + const std::string_view &st) -> blocks_gen_functor +{ + css_parser parser(pool); + + auto &&consumed_blocks = parser.consume_css_blocks(st); + const auto &rules = consumed_blocks->get_blocks_or_empty(); + + auto rules_it = rules.begin(); + auto &&children = (*rules_it)->get_blocks_or_empty(); + auto cur = children.begin(); + auto last = children.end(); + + /* + * We use move only wrapper to state the fact that the cosumed blocks + * are moved into the closure, not copied. + * It prevents us from thinking about copies of the blocks and + * functors. + * Mutable lambda is required to copy iterators inside of the closure, + * as, again, it is C++ where lifetime of the objects must be explicitly + * transferred. On the other hand, we could move all stuff inside and remove + * mutable. + */ + return [cur, consumed_blocks = std::move(consumed_blocks), last](void) mutable + -> const css_consumed_block & { + if (cur != last) { + const auto &ret = (*cur); + + ++cur; + + return *ret; + } + + return css_parser_eof_block; + }; +} + +auto get_rules_parser_functor(rspamd_mempool_t *pool, + const std::string_view &st) -> blocks_gen_functor +{ + css_parser parser(pool); + + auto &&consumed_blocks = parser.consume_css_rule(st); + const auto &rules = consumed_blocks->get_blocks_or_empty(); + + auto cur = rules.begin(); + auto last = rules.end(); + + return [cur, consumed_blocks = std::move(consumed_blocks), last](void) mutable + -> const css_consumed_block & { + if (cur != last) { + const auto &ret = (*cur); + + ++cur; + + return *ret; + } + + return css_parser_eof_block; + }; +} + + +/* + * Wrapper for the parser + */ +auto parse_css(rspamd_mempool_t *pool, const std::string_view &st, + std::shared_ptr<css_style_sheet> &&other) + -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error> +{ + css_parser parser(std::forward<std::shared_ptr<css_style_sheet>>(other), pool); + std::string_view processed_input; + + if (css_parser::need_unescape(st)) { + processed_input = rspamd::css::unescape_css(pool, st); + } + else { + /* Lowercase inplace */ + auto *nspace = rspamd_mempool_alloc_buffer(pool, st.size()); + rspamd_str_copy_lc(st.data(), nspace, st.size()); + processed_input = std::string_view{nspace, st.size()}; + } + + auto maybe_error = parser.consume_input(processed_input); + if (!maybe_error) { + return parser.get_object_maybe(); + } + + return tl::make_unexpected(maybe_error.value()); +} + +auto parse_css_declaration(rspamd_mempool_t *pool, const std::string_view &st) + -> rspamd::html::html_block * +{ + std::string_view processed_input; + + if (css_parser::need_unescape(st)) { + processed_input = rspamd::css::unescape_css(pool, st); + } + else { + auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, st.size())); + auto nlen = rspamd_str_copy_lc(st.data(), nspace, st.size()); + processed_input = std::string_view{nspace, nlen}; + } + auto &&res = process_declaration_tokens(pool, + get_rules_parser_functor(pool, processed_input)); + + if (res) { + return res->compile_to_block(pool); + } + + return nullptr; +} + +TEST_SUITE("css") +{ + TEST_CASE("parse colors") + { + const std::vector<const char *> cases{ + "P { CoLoR: rgb(100%, 50%, 0%); opacity: -1; width: 1em; display: none; } /* very transparent solid orange тест */", + "p { color: rgb(100%, 50%, 0%); opacity: 2; display: inline; } /* very transparent solid orange */", + "p { color: rgb(100%, 50%, 0%); opacity: 0.5; } /* very transparent solid orange */\n", + "p { color: rgb(100%, 50%, 0%); opacity: 1; width: 99%; } /* very transparent solid orange */\n", + "p { color: rgb(100%, 50%, 0%); opacity: 10%; width: 99%; } /* very transparent solid orange */\n", + "p { color: rgb(100%, 50%, 0%); opacity: 10%; width: 100px; } /* very transparent solid orange */\n", + "p { color: rgb(100%, 50%, 0%); opacity: 10% } /* very transparent solid orange */\n", + "* { color: hsl(0, 100%, 50%) !important } /* red */\n", + "* { color: hsl(120, 100%, 50%) important } /* lime */\n", + "* { color: hsl(120, 100%, 25%) } /* dark green */\n", + "* { color: hsl(120, 100%, 75%) } /* light green */\n", + "* { color: hsl(120, 75%, 75%) } /* pastel green, and so on */\n", + "em { color: #f00 } /* #rgb */\n", + "em { color: #ff0000 } /* #rrggbb */\n", + "em { color: rgb(255,0,0) }\n", + "em { color: rgb(100%, 0%, 0%) }\n", + "body {color: black; background: white }\n", + "h1 { color: maroon }\n", + "h2 { color: olive }\n", + "em { color: rgb(255,0,0) } /* integer range 0 - 255 */\n", + "em { color: rgb(300,0,0) } /* clipped to rgb(255,0,0) */\n", + "em { color: rgb(255,-10,0) } /* clipped to rgb(255,0,0) */\n", + "em { color: rgb(110%, 0%, 0%) } /* clipped to rgb(100%,0%,0%) */\n", + "em { color: rgb(255,0,0) } /* integer range 0 - 255 */\n", + "em { color: rgba(255,0,0,1) /* the same, with explicit opacity of 1 */\n", + "em { color: rgb(100%,0%,0%) } /* float range 0.0% - 100.0% */\n", + "em { color: rgba(100%,0%,0%,1) } /* the same, with explicit opacity of 1 */\n", + "p { color: rgba(0,0,255,0.5) } /* semi-transparent solid blue */\n", + "p { color: rgba(100%, 50%, 0%, 0.1) } /* very transparent solid orange */", + ".chat-icon[_ng-cnj-c0]::before{content:url(group-2.63e87cd21fbf8c966dd.svg);width:60px;height:60px;display:block}", + "tt{color:#1e3482}", + "tt{unicode-range: u+0049-u+004a,u+0020;}", + "@import url(https://fonts.googleapis.com/css?family=arial:300,400,7000;", + "tt{color:black;\v}", + "tt{color:black;\f}", + }; + + rspamd_mempool_t *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "css", 0); + for (const auto &c: cases) { + SUBCASE((std::string("parse css: ") + c).c_str()) + { + CHECK(parse_css(pool, c, nullptr).value().get() != nullptr); + } + } + + /* We now merge all styles together */ + SUBCASE("merged css parse") + { + std::shared_ptr<css_style_sheet> merged; + for (const auto &c: cases) { + auto ret = parse_css(pool, c, std::move(merged)); + merged.swap(ret.value()); + } + + CHECK(merged.get() != nullptr); + } + + rspamd_mempool_delete(pool); + } +} +}// namespace rspamd::css diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx new file mode 100644 index 0000000..d5a9671 --- /dev/null +++ b/src/libserver/css/css_parser.hxx @@ -0,0 +1,244 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_CSS_PARSER_HXX +#define RSPAMD_CSS_PARSER_HXX + +#include <variant> +#include <vector> +#include <memory> +#include <string> + +#include "function2/function2.hpp" +#include "css_tokeniser.hxx" +#include "parse_error.hxx" +#include "contrib/expected/expected.hpp" +#include "logger.h" + +/* Forward declaration */ +namespace rspamd::html { +struct html_block; +} + +namespace rspamd::css { + +/* + * Represents a consumed token by a parser + */ +class css_consumed_block { +public: + enum class parser_tag_type : std::uint8_t { + css_top_block = 0, + css_qualified_rule, + css_at_rule, + css_simple_block, + css_function, + css_function_arg, + css_component, + css_eof_block, + }; + using consumed_block_ptr = std::unique_ptr<css_consumed_block>; + + struct css_function_block { + css_parser_token function; + std::vector<consumed_block_ptr> args; + + css_function_block(css_parser_token &&tok) + : function(std::forward<css_parser_token>(tok)) + { + } + + auto as_string() const -> std::string_view + { + return function.get_string_or_default(""); + } + + static auto empty_function() -> const css_function_block & + { + static const css_function_block invalid( + css_parser_token(css_parser_token::token_type::eof_token, + css_parser_token_placeholder())); + return invalid; + } + }; + + css_consumed_block() + : tag(parser_tag_type::css_eof_block) + { + } + css_consumed_block(parser_tag_type tag) + : tag(tag) + { + if (tag == parser_tag_type::css_top_block || + tag == parser_tag_type::css_qualified_rule || + tag == parser_tag_type::css_simple_block) { + /* Pre-allocate content for known vector blocks */ + std::vector<consumed_block_ptr> vec; + vec.reserve(4); + content = std::move(vec); + } + } + /* Construct a block from a single lexer token (for trivial blocks) */ + explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) + : tag(tag) + { + if (tag == parser_tag_type::css_function) { + content = css_function_block{std::move(tok)}; + } + else { + content = std::move(tok); + } + } + + /* Attach a new block to the compound block, consuming block inside */ + auto attach_block(consumed_block_ptr &&block) -> bool; + /* Attach a new argument to the compound function block, consuming block inside */ + auto add_function_argument(consumed_block_ptr &&block) -> bool; + + auto assign_token(css_parser_token &&tok) -> void + { + content = std::move(tok); + } + + /* Empty blocks used to avoid type checks in loops */ + const inline static std::vector<consumed_block_ptr> empty_block_vec{}; + + auto is_blocks_vec() const -> bool + { + return (std::holds_alternative<std::vector<consumed_block_ptr>>(content)); + } + + auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr> & + { + if (is_blocks_vec()) { + return std::get<std::vector<consumed_block_ptr>>(content); + } + + return empty_block_vec; + } + + auto is_token() const -> bool + { + return (std::holds_alternative<css_parser_token>(content)); + } + + auto get_token_or_empty() const -> const css_parser_token & + { + if (is_token()) { + return std::get<css_parser_token>(content); + } + + return css_parser_eof_token(); + } + + auto is_function() const -> bool + { + return (std::holds_alternative<css_function_block>(content)); + } + + auto get_function_or_invalid() const -> const css_function_block & + { + if (is_function()) { + return std::get<css_function_block>(content); + } + + return css_function_block::empty_function(); + } + + auto size() const -> std::size_t + { + auto ret = 0; + + std::visit([&](auto &arg) { + using T = std::decay_t<decltype(arg)>; + + if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) { + /* Array of blocks */ + ret = arg.size(); + } + else if constexpr (std::is_same_v<T, std::monostate>) { + /* Empty block */ + ret = 0; + } + else { + /* Single element block */ + ret = 1; + } + }, + content); + + return ret; + } + + auto is_eof() -> bool + { + return tag == parser_tag_type::css_eof_block; + } + + /* Debug methods */ + auto token_type_str(void) const -> const char *; + auto debug_str(void) -> std::string; + +public: + parser_tag_type tag; + +private: + std::variant<std::monostate, + std::vector<consumed_block_ptr>, + css_parser_token, + css_function_block> + content; +}; + +extern const css_consumed_block css_parser_eof_block; + +using blocks_gen_functor = fu2::unique_function<const css_consumed_block &(void)>; + +class css_style_sheet; +/* + * Update the existing stylesheet with another stylesheet + */ +auto parse_css(rspamd_mempool_t *pool, const std::string_view &st, + std::shared_ptr<css_style_sheet> &&other) + -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>; + +/* + * Creates a functor to consume css selectors sequence + */ +auto get_selectors_parser_functor(rspamd_mempool_t *pool, + const std::string_view &st) -> blocks_gen_functor; + +/* + * Creates a functor to process a rule definition (e.g. from embedded style tag for + * an element) + */ +auto get_rules_parser_functor(rspamd_mempool_t *pool, + const std::string_view &st) -> blocks_gen_functor; + +/** + * Parses a css declaration (e.g. embedded css and returns a completed html block) + * @param pool + * @param st + * @return + */ +auto parse_css_declaration(rspamd_mempool_t *pool, const std::string_view &st) + -> rspamd::html::html_block *; + +}// namespace rspamd::css + +#endif//RSPAMD_CSS_PARSER_HXX diff --git a/src/libserver/css/css_property.cxx b/src/libserver/css/css_property.cxx new file mode 100644 index 0000000..1557109 --- /dev/null +++ b/src/libserver/css/css_property.cxx @@ -0,0 +1,69 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_property.hxx" +#include "frozen/unordered_map.h" +#include "frozen/string.h" +#include "libutil/cxx/util.hxx" + +namespace rspamd::css { + +constexpr const auto prop_names_map = frozen::make_unordered_map<frozen::string, css_property_type>({ + {"font", css_property_type::PROPERTY_FONT}, + {"font-color", css_property_type::PROPERTY_FONT_COLOR}, + {"font-size", css_property_type::PROPERTY_FONT_SIZE}, + {"color", css_property_type::PROPERTY_COLOR}, + {"bgcolor", css_property_type::PROPERTY_BGCOLOR}, + {"background-color", css_property_type::PROPERTY_BGCOLOR}, + {"background", css_property_type::PROPERTY_BACKGROUND}, + {"height", css_property_type::PROPERTY_HEIGHT}, + {"width", css_property_type::PROPERTY_WIDTH}, + {"display", css_property_type::PROPERTY_DISPLAY}, + {"visibility", css_property_type::PROPERTY_VISIBILITY}, + {"opacity", css_property_type::PROPERTY_OPACITY}, +}); + +/* Ensure that we have all cases listed */ +static_assert(prop_names_map.size() >= static_cast<int>(css_property_type::PROPERTY_NYI)); + +auto token_string_to_property(const std::string_view &inp) + -> css_property_type +{ + + css_property_type ret = css_property_type::PROPERTY_NYI; + + auto known_type = find_map(prop_names_map, inp); + + if (known_type) { + ret = known_type.value().get(); + } + + return ret; +} + +auto css_property::from_token(const css_parser_token &tok) + -> tl::expected<css_property, css_parse_error> +{ + if (tok.type == css_parser_token::token_type::ident_token) { + auto sv = tok.get_string_or_default(""); + + return css_property{token_string_to_property(sv), css_property_flag::FLAG_NORMAL}; + } + + return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)}; +} + +}// namespace rspamd::css diff --git a/src/libserver/css/css_property.hxx b/src/libserver/css/css_property.hxx new file mode 100644 index 0000000..9661222 --- /dev/null +++ b/src/libserver/css/css_property.hxx @@ -0,0 +1,172 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef RSPAMD_CSS_PROPERTY_HXX +#define RSPAMD_CSS_PROPERTY_HXX + +#include <string> +#include "css_tokeniser.hxx" +#include "parse_error.hxx" +#include "contrib/expected/expected.hpp" + +namespace rspamd::css { + +/* + * To be extended with properties that are interesting from the email + * point of view + */ +enum class css_property_type : std::uint16_t { + PROPERTY_FONT = 0, + PROPERTY_FONT_COLOR, + PROPERTY_FONT_SIZE, + PROPERTY_COLOR, + PROPERTY_BGCOLOR, + PROPERTY_BACKGROUND, + PROPERTY_HEIGHT, + PROPERTY_WIDTH, + PROPERTY_DISPLAY, + PROPERTY_VISIBILITY, + PROPERTY_OPACITY, + PROPERTY_NYI, +}; + +enum class css_property_flag : std::uint16_t { + FLAG_NORMAL, + FLAG_IMPORTANT, + FLAG_NOT_IMPORTANT +}; + +struct alignas(int) css_property { + css_property_type type; + css_property_flag flag; + + css_property(css_property_type t, css_property_flag fl = css_property_flag::FLAG_NORMAL) + : type(t), flag(fl) + { + } + static tl::expected<css_property, css_parse_error> from_token( + const css_parser_token &tok); + + constexpr auto to_string(void) const -> const char * + { + const char *ret = "nyi"; + + switch (type) { + case css_property_type::PROPERTY_FONT: + ret = "font"; + break; + case css_property_type::PROPERTY_FONT_COLOR: + ret = "font-color"; + break; + case css_property_type::PROPERTY_FONT_SIZE: + ret = "font-size"; + break; + case css_property_type::PROPERTY_COLOR: + ret = "color"; + break; + case css_property_type::PROPERTY_BGCOLOR: + ret = "bgcolor"; + break; + case css_property_type::PROPERTY_BACKGROUND: + ret = "background"; + break; + case css_property_type::PROPERTY_HEIGHT: + ret = "height"; + break; + case css_property_type::PROPERTY_WIDTH: + ret = "width"; + break; + case css_property_type::PROPERTY_DISPLAY: + ret = "display"; + break; + case css_property_type::PROPERTY_VISIBILITY: + ret = "visibility"; + break; + case css_property_type::PROPERTY_OPACITY: + ret = "opacity"; + break; + default: + break; + } + + return ret; + } + + /* Helpers to define which values are valid for which properties */ + auto is_color(void) const -> bool + { + return type == css_property_type::PROPERTY_COLOR || + type == css_property_type::PROPERTY_BACKGROUND || + type == css_property_type::PROPERTY_BGCOLOR || + type == css_property_type::PROPERTY_FONT_COLOR || + type == css_property_type::PROPERTY_FONT; + } + auto is_dimension(void) const -> bool + { + return type == css_property_type::PROPERTY_HEIGHT || + type == css_property_type::PROPERTY_WIDTH || + type == css_property_type::PROPERTY_FONT_SIZE || + type == css_property_type::PROPERTY_FONT; + } + + auto is_normal_number(void) const -> bool + { + return type == css_property_type::PROPERTY_OPACITY; + } + + auto is_display(void) const -> bool + { + return type == css_property_type::PROPERTY_DISPLAY; + } + + auto is_visibility(void) const -> bool + { + return type == css_property_type::PROPERTY_VISIBILITY; + } + + auto operator==(const css_property &other) const + { + return type == other.type; + } +}; + + +}// namespace rspamd::css + +/* Make properties hashable */ +namespace std { +template<> +class hash<rspamd::css::css_property> { +public: + using is_avalanching = void; + /* Mix bits to provide slightly better distribution but being constexpr */ + constexpr size_t operator()(const rspamd::css::css_property &prop) const + { + std::size_t key = 0xdeadbeef ^ static_cast<std::size_t>(prop.type); + key = (~key) + (key << 21); + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8); + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4); + key = key ^ (key >> 28); + key = key + (key << 31); + return key; + } +}; +}// namespace std + +#endif//RSPAMD_CSS_PROPERTY_HXX
\ No newline at end of file diff --git a/src/libserver/css/css_rule.cxx b/src/libserver/css/css_rule.cxx new file mode 100644 index 0000000..4e33ac7 --- /dev/null +++ b/src/libserver/css/css_rule.cxx @@ -0,0 +1,531 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_rule.hxx" +#include "css.hxx" +#include "libserver/html/html_block.hxx" +#include <limits> + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +namespace rspamd::css { + +/* Class methods */ +void css_rule::override_values(const css_rule &other) +{ + int bits = 0; + /* Ensure that our bitset is large enough */ + static_assert(1 << std::variant_size_v<decltype(css_value::value)> < + std::numeric_limits<int>::max()); + + for (const auto &v: values) { + bits |= static_cast<int>(1 << v.value.index()); + } + + for (const auto &ov: other.values) { + if (isset(&bits, static_cast<int>(1 << ov.value.index()))) { + /* We need to override the existing value */ + /* + * The algorithm is not very efficient, + * so we need to sort the values first and have a O(N) algorithm + * On the other hand, values vectors are usually limited to the + * number of elements about less then 10, so this O(N^2) algorithm + * is probably ok here + */ + for (auto &v: values) { + if (v.value.index() == ov.value.index()) { + v = ov; + } + } + } + } + + /* Copy only not set values */ + std::copy_if(other.values.begin(), other.values.end(), std::back_inserter(values), + [&bits](const auto &elt) -> bool { + return (bits & (1 << static_cast<int>(elt.value.index()))) == 0; + }); +} + +void css_rule::merge_values(const css_rule &other) +{ + unsigned int bits = 0; + + for (const auto &v: values) { + bits |= 1 << v.value.index(); + } + + /* Copy only not set values */ + std::copy_if(other.values.begin(), other.values.end(), std::back_inserter(values), + [&bits](const auto &elt) -> bool { + return (bits & (1 << elt.value.index())) == 0; + }); +} + +auto css_declarations_block::add_rule(rule_shared_ptr rule) -> bool +{ + auto it = rules.find(rule); + auto &&remote_prop = rule->get_prop(); + auto ret = true; + + if (rule->get_values().size() == 0) { + /* Ignore rules with no values */ + return false; + } + + if (it != rules.end()) { + auto &&local_rule = *it; + auto &&local_prop = local_rule->get_prop(); + + if (local_prop.flag == css_property_flag::FLAG_IMPORTANT) { + if (remote_prop.flag == css_property_flag::FLAG_IMPORTANT) { + local_rule->override_values(*rule); + } + else { + /* Override remote not important over local important */ + local_rule->merge_values(*rule); + } + } + else if (local_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) { + if (remote_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) { + local_rule->override_values(*rule); + } + else { + /* Override local not important over important */ + local_rule->merge_values(*rule); + } + } + else { + if (remote_prop.flag == css_property_flag::FLAG_IMPORTANT) { + /* Override with remote */ + local_rule->override_values(*rule); + } + else if (remote_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) { + /* Ignore remote not important over local normal */ + ret = false; + } + else { + /* Merge both */ + local_rule->merge_values(*rule); + } + } + } + else { + rules.insert(std::move(rule)); + } + + return ret; +} + +}// namespace rspamd::css + +namespace rspamd::css { + +/* Static functions */ + +static auto +allowed_property_value(const css_property &prop, const css_consumed_block &parser_block) + -> std::optional<css_value> +{ + if (prop.is_color()) { + if (parser_block.is_token()) { + /* A single token */ + const auto &tok = parser_block.get_token_or_empty(); + + if (tok.type == css_parser_token::token_type::hash_token) { + return css_value::maybe_color_from_hex(tok.get_string_or_default("")); + } + else if (tok.type == css_parser_token::token_type::ident_token) { + auto &&ret = css_value::maybe_color_from_string(tok.get_string_or_default("")); + + return ret; + } + } + else if (parser_block.is_function()) { + const auto &func = parser_block.get_function_or_invalid(); + + auto &&ret = css_value::maybe_color_from_function(func); + return ret; + } + } + if (prop.is_dimension()) { + if (parser_block.is_token()) { + /* A single token */ + const auto &tok = parser_block.get_token_or_empty(); + + if (tok.type == css_parser_token::token_type::number_token) { + return css_value::maybe_dimension_from_number(tok); + } + } + } + if (prop.is_display()) { + if (parser_block.is_token()) { + /* A single token */ + const auto &tok = parser_block.get_token_or_empty(); + + if (tok.type == css_parser_token::token_type::ident_token) { + return css_value::maybe_display_from_string(tok.get_string_or_default("")); + } + } + } + if (prop.is_visibility()) { + if (parser_block.is_token()) { + /* A single token */ + const auto &tok = parser_block.get_token_or_empty(); + + if (tok.type == css_parser_token::token_type::ident_token) { + return css_value::maybe_display_from_string(tok.get_string_or_default("")); + } + } + } + if (prop.is_normal_number()) { + if (parser_block.is_token()) { + /* A single token */ + const auto &tok = parser_block.get_token_or_empty(); + + if (tok.type == css_parser_token::token_type::number_token) { + return css_value{tok.get_normal_number_or_default(0)}; + } + } + } + + return std::nullopt; +} + +auto process_declaration_tokens(rspamd_mempool_t *pool, + blocks_gen_functor &&next_block_functor) + -> css_declarations_block_ptr +{ + css_declarations_block_ptr ret; + bool can_continue = true; + css_property cur_property{css_property_type::PROPERTY_NYI, + css_property_flag::FLAG_NORMAL}; + static const css_property bad_property{css_property_type::PROPERTY_NYI, + css_property_flag::FLAG_NORMAL}; + std::shared_ptr<css_rule> cur_rule; + + enum { + parse_property, + parse_value, + ignore_value, /* For unknown properties */ + } state = parse_property; + + auto seen_not = false; + ret = std::make_shared<css_declarations_block>(); + + while (can_continue) { + const auto &next_tok = next_block_functor(); + + switch (next_tok.tag) { + case css_consumed_block::parser_tag_type::css_component: + /* Component can be a property or a compound list of values */ + if (state == parse_property) { + cur_property = css_property::from_token(next_tok.get_token_or_empty()) + .value_or(bad_property); + + if (cur_property.type == css_property_type::PROPERTY_NYI) { + state = ignore_value; + /* Ignore everything till ; */ + continue; + } + + msg_debug_css("got css property: %s", cur_property.to_string()); + + /* We now expect colon block */ + const auto &expect_colon_block = next_block_functor(); + + if (expect_colon_block.tag != css_consumed_block::parser_tag_type::css_component) { + state = ignore_value; /* Ignore up to the next rule */ + } + else { + const auto &expect_colon_tok = expect_colon_block.get_token_or_empty(); + + if (expect_colon_tok.type != css_parser_token::token_type::colon_token) { + msg_debug_css("invalid rule, no colon after property"); + state = ignore_value; /* Ignore up to the next rule */ + } + else { + state = parse_value; + cur_rule = std::make_shared<css_rule>(cur_property); + } + } + } + else if (state == parse_value) { + /* Check semicolon */ + if (next_tok.is_token()) { + const auto &parser_tok = next_tok.get_token_or_empty(); + + if (parser_tok.type == css_parser_token::token_type::semicolon_token && cur_rule) { + ret->add_rule(std::move(cur_rule)); + state = parse_property; + seen_not = false; + continue; + } + else if (parser_tok.type == css_parser_token::token_type::delim_token) { + if (parser_tok.get_string_or_default("") == "!") { + /* Probably something like !important */ + seen_not = true; + } + } + else if (parser_tok.type == css_parser_token::token_type::ident_token) { + if (parser_tok.get_string_or_default("") == "important") { + if (seen_not) { + msg_debug_css("add !important flag to property %s", + cur_property.to_string()); + cur_property.flag = css_property_flag::FLAG_NOT_IMPORTANT; + } + else { + msg_debug_css("add important flag to property %s", + cur_property.to_string()); + cur_property.flag = css_property_flag::FLAG_IMPORTANT; + } + + seen_not = false; + + continue; + } + else { + seen_not = false; + } + } + } + + auto maybe_value = allowed_property_value(cur_property, next_tok); + + if (maybe_value) { + msg_debug_css("added value %s to the property %s", + maybe_value.value().debug_str().c_str(), + cur_property.to_string()); + cur_rule->add_value(maybe_value.value()); + } + } + else { + /* Ignore all till ; */ + if (next_tok.is_token()) { + const auto &parser_tok = next_tok.get_token_or_empty(); + + if (parser_tok.type == css_parser_token::token_type::semicolon_token) { + state = parse_property; + } + } + } + break; + case css_consumed_block::parser_tag_type::css_function: + if (state == parse_value) { + auto maybe_value = allowed_property_value(cur_property, next_tok); + + if (maybe_value && cur_rule) { + msg_debug_css("added value %s to the property %s", + maybe_value.value().debug_str().c_str(), + cur_property.to_string()); + cur_rule->add_value(maybe_value.value()); + } + } + break; + case css_consumed_block::parser_tag_type::css_eof_block: + if (state == parse_value) { + ret->add_rule(std::move(cur_rule)); + } + can_continue = false; + break; + default: + can_continue = false; + break; + } + } + + return ret; /* copy elision */ +} + +auto css_declarations_block::merge_block(const css_declarations_block &other, merge_type how) -> void +{ + const auto &other_rules = other.get_rules(); + + + for (auto &rule: other_rules) { + auto &&found_it = rules.find(rule); + + if (found_it != rules.end()) { + /* Duplicate, need to merge */ + switch (how) { + case merge_type::merge_override: + /* Override */ + (*found_it)->override_values(*rule); + break; + case merge_type::merge_duplicate: + /* Merge values */ + add_rule(rule); + break; + case merge_type::merge_parent: + /* Do not merge parent rule if more specific local one is presented */ + break; + } + } + else { + /* New property, just insert */ + rules.insert(rule); + } + } +} + +auto css_declarations_block::compile_to_block(rspamd_mempool_t *pool) const -> rspamd::html::html_block * +{ + auto *block = rspamd_mempool_alloc0_type(pool, rspamd::html::html_block); + auto opacity = -1; + const css_rule *font_rule = nullptr, *background_rule = nullptr; + + for (const auto &rule: rules) { + auto prop = rule->get_prop().type; + const auto &vals = rule->get_values(); + + if (vals.empty()) { + continue; + } + + switch (prop) { + case css_property_type::PROPERTY_VISIBILITY: + case css_property_type::PROPERTY_DISPLAY: { + auto disp = vals.back().to_display().value_or(css_display_value::DISPLAY_INLINE); + block->set_display(disp); + break; + } + case css_property_type::PROPERTY_FONT_SIZE: { + auto fs = vals.back().to_dimension(); + if (fs) { + block->set_font_size(fs.value().dim, fs.value().is_percent); + } + } + case css_property_type::PROPERTY_OPACITY: { + opacity = vals.back().to_number().value_or(opacity); + break; + } + case css_property_type::PROPERTY_FONT_COLOR: + case css_property_type::PROPERTY_COLOR: { + auto color = vals.back().to_color(); + if (color) { + block->set_fgcolor(color.value()); + } + break; + } + case css_property_type::PROPERTY_BGCOLOR: { + auto color = vals.back().to_color(); + if (color) { + block->set_bgcolor(color.value()); + } + break; + } + case css_property_type::PROPERTY_HEIGHT: { + auto w = vals.back().to_dimension(); + if (w) { + block->set_width(w.value().dim, w.value().is_percent); + } + break; + } + case css_property_type::PROPERTY_WIDTH: { + auto h = vals.back().to_dimension(); + if (h) { + block->set_width(h.value().dim, h.value().is_percent); + } + break; + } + /* Optional attributes */ + case css_property_type::PROPERTY_FONT: + font_rule = rule.get(); + break; + case css_property_type::PROPERTY_BACKGROUND: + background_rule = rule.get(); + break; + default: + /* Do nothing for now */ + break; + } + } + + /* Optional properties */ + if (!(block->fg_color_mask) && font_rule) { + auto &vals = font_rule->get_values(); + + for (const auto &val: vals) { + auto maybe_color = val.to_color(); + + if (maybe_color) { + block->set_fgcolor(maybe_color.value()); + } + } + } + + if (!(block->font_mask) && font_rule) { + auto &vals = font_rule->get_values(); + + for (const auto &val: vals) { + auto maybe_dim = val.to_dimension(); + + if (maybe_dim) { + block->set_font_size(maybe_dim.value().dim, maybe_dim.value().is_percent); + } + } + } + + if (!(block->bg_color_mask) && background_rule) { + auto &vals = background_rule->get_values(); + + for (const auto &val: vals) { + auto maybe_color = val.to_color(); + + if (maybe_color) { + block->set_bgcolor(maybe_color.value()); + } + } + } + + return block; +} + +void css_rule::add_value(const css_value &value) +{ + values.push_back(value); +} + + +TEST_SUITE("css") +{ + TEST_CASE("simple css rules") + { + const std::vector<std::pair<const char *, std::vector<css_property>>> cases{ + {"font-size:12.0pt;line-height:115%", + {css_property(css_property_type::PROPERTY_FONT_SIZE)}}, + {"font-size:12.0pt;display:none", + {css_property(css_property_type::PROPERTY_FONT_SIZE), + css_property(css_property_type::PROPERTY_DISPLAY)}}}; + + auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "css", 0); + + for (const auto &c: cases) { + auto res = process_declaration_tokens(pool, + get_rules_parser_functor(pool, c.first)); + + CHECK(res.get() != nullptr); + + for (auto i = 0; i < c.second.size(); i++) { + CHECK(res->has_property(c.second[i])); + } + } + } +} + +}// namespace rspamd::css
\ No newline at end of file diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx new file mode 100644 index 0000000..114b83e --- /dev/null +++ b/src/libserver/css/css_rule.hxx @@ -0,0 +1,153 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef RSPAMD_CSS_RULE_HXX +#define RSPAMD_CSS_RULE_HXX + +#include "css_value.hxx" +#include "css_property.hxx" +#include "css_parser.hxx" +#include "contrib/ankerl/unordered_dense.h" +#include "libutil/cxx/util.hxx" +#include "libutil/cxx/hash_util.hxx" +#include <vector> +#include <memory> + +namespace rspamd::html { +/* Forward declaration */ +struct html_block; +}// namespace rspamd::html + +namespace rspamd::css { + +class css_rule { + css_property prop; + using css_values_vec = std::vector<css_value>; + css_values_vec values; + +public: + /* We must create css rule explicitly from a property and values */ + css_rule() = delete; + + css_rule(const css_rule &other) = delete; + + /* Constructors */ + css_rule(css_rule &&other) noexcept = default; + + explicit css_rule(css_property &&prop, css_values_vec &&values) noexcept + : prop(prop), values(std::forward<css_values_vec>(values)) + { + } + + explicit css_rule(const css_property &prop) noexcept + : prop(prop), values{} + { + } + + /* Methods */ + /* Comparison is special, as we care merely about property, not the values */ + auto operator==(const css_rule &other) const + { + return prop == other.prop; + } + + constexpr const css_values_vec &get_values(void) const + { + return values; + } + constexpr const css_property &get_prop(void) const + { + return prop; + } + + /* Import values from another rules according to the importance */ + void override_values(const css_rule &other); + void merge_values(const css_rule &other); + void add_value(const css_value &value); +}; + +}// namespace rspamd::css + +/* Make rules hashable by property */ +namespace std { +template<> +class hash<rspamd::css::css_rule> { +public: + using is_avalanching = void; + constexpr auto operator()(const rspamd::css::css_rule &rule) const -> auto + { + return hash<rspamd::css::css_property>()(rule.get_prop()); + } +}; + +}// namespace std + +namespace rspamd::css { + +/** + * Class that is designed to hold css declaration (a set of rules) + */ +class css_declarations_block { +public: + using rule_shared_ptr = std::shared_ptr<css_rule>; + using rule_shared_hash = smart_ptr_hash<css_rule>; + using rule_shared_eq = smart_ptr_equal<css_rule>; + enum class merge_type { + merge_duplicate, + merge_parent, + merge_override + }; + + css_declarations_block() = default; + auto add_rule(rule_shared_ptr rule) -> bool; + auto merge_block(const css_declarations_block &other, + merge_type how = merge_type::merge_duplicate) -> void; + auto get_rules(void) const -> const auto & + { + return rules; + } + + /** + * Returns if a declaration block has some property + * @param prop + * @return + */ + auto has_property(const css_property &prop) const -> bool + { + return (rules.find(css_rule{prop}) != rules.end()); + } + + /** + * Compile CSS declaration to the html block + * @param pool used to carry memory required for html_block + * @return html block structure + */ + auto compile_to_block(rspamd_mempool_t *pool) const -> rspamd::html::html_block *; + +private: + ankerl::unordered_dense::set<rule_shared_ptr, rule_shared_hash, rule_shared_eq> rules; +}; + +using css_declarations_block_ptr = std::shared_ptr<css_declarations_block>; + +auto process_declaration_tokens(rspamd_mempool_t *pool, + blocks_gen_functor &&next_token_functor) + -> css_declarations_block_ptr; + +}// namespace rspamd::css + +#endif//RSPAMD_CSS_RULE_HXX
\ No newline at end of file diff --git a/src/libserver/css/css_rule_parser.rl b/src/libserver/css/css_rule_parser.rl new file mode 100644 index 0000000..e3b1876 --- /dev/null +++ b/src/libserver/css/css_rule_parser.rl @@ -0,0 +1,27 @@ +%%{ + machine css_parser; + alphtype unsigned char; + include css_syntax "css_syntax.rl"; + + main := declaration; +}%% + +%% write data; + +#include <cstddef> + +namespace rspamd::css { + +int +foo (const unsigned char *data, std::size_t len) +{ + const unsigned char *p = data, *pe = data + len, *eof; + int cs; + + %% write init; + %% write exec; + + return cs; +} + +}
\ No newline at end of file diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx new file mode 100644 index 0000000..a62ffff --- /dev/null +++ b/src/libserver/css/css_selector.cxx @@ -0,0 +1,226 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_selector.hxx" +#include "css.hxx" +#include "libserver/html/html.hxx" +#include "fmt/core.h" +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +namespace rspamd::css { + +auto process_selector_tokens(rspamd_mempool_t *pool, + blocks_gen_functor &&next_token_functor) + -> selectors_vec +{ + selectors_vec ret; + bool can_continue = true; + enum class selector_process_state { + selector_parse_start = 0, + selector_expect_ident, + selector_ident_consumed, + selector_ignore_attribute, + selector_ignore_function, + selector_ignore_combination + } state = selector_process_state::selector_parse_start; + std::unique_ptr<css_selector> cur_selector; + + + while (can_continue) { + const auto &next_tok = next_token_functor(); + + if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) { + const auto &parser_tok = next_tok.get_token_or_empty(); + + if (state == selector_process_state::selector_parse_start) { + /* + * At the beginning of the parsing we can expect either + * delim or an ident, everything else is discarded for now + */ + msg_debug_css("start consume selector"); + + switch (parser_tok.type) { + case css_parser_token::token_type::delim_token: { + auto delim_c = parser_tok.get_delim(); + + if (delim_c == '.') { + cur_selector = std::make_unique<css_selector>( + css_selector::selector_type::SELECTOR_CLASS); + state = selector_process_state::selector_expect_ident; + } + else if (delim_c == '#') { + cur_selector = std::make_unique<css_selector>( + css_selector::selector_type::SELECTOR_ID); + state = selector_process_state::selector_expect_ident; + } + else if (delim_c == '*') { + cur_selector = std::make_unique<css_selector>( + css_selector::selector_type::SELECTOR_ALL); + state = selector_process_state::selector_ident_consumed; + } + break; + } + case css_parser_token::token_type::ident_token: { + auto tag_id = html::html_tag_by_name(parser_tok.get_string_or_default("")); + + if (tag_id) { + cur_selector = std::make_unique<css_selector>(tag_id.value()); + } + state = selector_process_state::selector_ident_consumed; + break; + } + case css_parser_token::token_type::hash_token: + cur_selector = std::make_unique<css_selector>( + css_selector::selector_type::SELECTOR_ID); + cur_selector->value = + parser_tok.get_string_or_default(""); + state = selector_process_state::selector_ident_consumed; + break; + default: + msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start", + next_tok.token_type_str()); + can_continue = false; + break; + } + } + else if (state == selector_process_state::selector_expect_ident) { + /* + * We got something like a selector start, so we expect + * a plain ident + */ + if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) { + cur_selector->value = parser_tok.get_string_or_default(""); + state = selector_process_state::selector_ident_consumed; + } + else { + msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident", + next_tok.token_type_str()); + can_continue = false; + } + } + else if (state == selector_process_state::selector_ident_consumed) { + if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) { + /* Got full selector, attach it to the vector and go further */ + msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str()); + ret.push_back(std::move(cur_selector)); + state = selector_process_state::selector_parse_start; + } + else if (parser_tok.type == css_parser_token::token_type::semicolon_token) { + /* TODO: implement adjustments */ + state = selector_process_state::selector_ignore_function; + } + else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) { + /* TODO: implement attributes checks */ + state = selector_process_state::selector_ignore_attribute; + } + else { + /* TODO: implement selectors combinations */ + state = selector_process_state::selector_ignore_combination; + } + } + else { + /* Ignore state; ignore all till ',' token or eof token */ + if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) { + /* Got full selector, attach it to the vector and go further */ + ret.push_back(std::move(cur_selector)); + state = selector_process_state::selector_parse_start; + } + else { + auto debug_str = parser_tok.get_string_or_default(""); + msg_debug_css("ignore token %*s", (int) debug_str.size(), + debug_str.data()); + } + } + } + else { + /* End of parsing */ + if (state == selector_process_state::selector_ident_consumed && cur_selector) { + msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str()); + ret.push_back(std::move(cur_selector)); + } + else { + msg_debug_css("not attached selector, state: %d", static_cast<int>(state)); + } + can_continue = false; + } + } + + return ret; /* copy elision */ +} + +auto css_selector::debug_str() const -> std::string +{ + std::string ret; + + if (type == selector_type::SELECTOR_ID) { + ret += "#"; + } + else if (type == selector_type::SELECTOR_CLASS) { + ret += "."; + } + else if (type == selector_type::SELECTOR_ALL) { + ret = "*"; + + return ret; + } + + std::visit([&](auto arg) -> void { + using T = std::decay_t<decltype(arg)>; + + if constexpr (std::is_same_v<T, tag_id_t>) { + ret += fmt::format("tag: {}", static_cast<int>(arg)); + } + else { + ret += arg; + } + }, + value); + + return ret; +} + +TEST_SUITE("css") +{ + TEST_CASE("simple css selectors") + { + const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{ + {"em", {css_selector::selector_type::SELECTOR_TAG}}, + {"*", {css_selector::selector_type::SELECTOR_ALL}}, + {".class", {css_selector::selector_type::SELECTOR_CLASS}}, + {"#id", {css_selector::selector_type::SELECTOR_ID}}, + {"em,.class,#id", {css_selector::selector_type::SELECTOR_TAG, css_selector::selector_type::SELECTOR_CLASS, css_selector::selector_type::SELECTOR_ID}}, + }; + + auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(), + "css", 0); + + for (const auto &c: cases) { + auto res = process_selector_tokens(pool, + get_selectors_parser_functor(pool, c.first)); + + CHECK(c.second.size() == res.size()); + + for (auto i = 0; i < c.second.size(); i++) { + CHECK(res[i]->type == c.second[i]); + } + } + + rspamd_mempool_delete(pool); + } +} + +}// namespace rspamd::css diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx new file mode 100644 index 0000000..65b185a --- /dev/null +++ b/src/libserver/css/css_selector.hxx @@ -0,0 +1,134 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_CSS_SELECTOR_HXX +#define RSPAMD_CSS_SELECTOR_HXX + +#include <variant> +#include <string> +#include <optional> +#include <vector> +#include <memory> + +#include "function2/function2.hpp" +#include "parse_error.hxx" +#include "css_parser.hxx" +#include "libserver/html/html_tags.h" +#include "libcryptobox/cryptobox.h" + +namespace rspamd::css { + +/* + * Holds a value for css selector, internal is handled by variant + */ +struct css_selector { + enum class selector_type { + SELECTOR_TAG, /* e.g. tr, for this value we use tag_id_t */ + SELECTOR_CLASS, /* generic class, e.g. .class */ + SELECTOR_ID, /* e.g. #id */ + SELECTOR_ALL /* * selector */ + }; + + selector_type type; + std::variant<tag_id_t, std::string_view> value; + + /* Conditions for the css selector */ + /* Dependency on attributes */ + struct css_attribute_condition { + std::string_view attribute; + std::string_view op = ""; + std::string_view value = ""; + }; + + /* General dependency chain */ + using css_selector_ptr = std::unique_ptr<css_selector>; + using css_selector_dep = std::variant<css_attribute_condition, css_selector_ptr>; + std::vector<css_selector_dep> dependencies; + + auto to_tag(void) const -> std::optional<tag_id_t> + { + if (type == selector_type::SELECTOR_TAG) { + return std::get<tag_id_t>(value); + } + return std::nullopt; + } + + auto to_string(void) const -> std::optional<const std::string_view> + { + if (type != selector_type::SELECTOR_TAG) { + return std::string_view(std::get<std::string_view>(value)); + } + return std::nullopt; + }; + + explicit css_selector(selector_type t) + : type(t) + { + } + explicit css_selector(tag_id_t t) + : type(selector_type::SELECTOR_TAG) + { + value = t; + } + explicit css_selector(const std::string_view &st, selector_type t = selector_type::SELECTOR_ID) + : type(t) + { + value = st; + } + + auto operator==(const css_selector &other) const -> bool + { + return type == other.type && value == other.value; + } + + auto debug_str(void) const -> std::string; +}; + + +using selectors_vec = std::vector<std::unique_ptr<css_selector>>; + +/* + * Consume selectors token and split them to the list of selectors + */ +auto process_selector_tokens(rspamd_mempool_t *pool, + blocks_gen_functor &&next_token_functor) + -> selectors_vec; + +}// namespace rspamd::css + +/* Selectors hashing */ +namespace std { +template<> +class hash<rspamd::css::css_selector> { +public: + using is_avalanching = void; + auto operator()(const rspamd::css::css_selector &sel) const -> std::size_t + { + if (sel.type == rspamd::css::css_selector::selector_type::SELECTOR_TAG) { + return static_cast<std::size_t>(std::get<tag_id_t>(sel.value)); + } + else { + const auto &sv = std::get<std::string_view>(sel.value); + + return rspamd_cryptobox_fast_hash(sv.data(), sv.size(), 0xdeadbabe); + } + } +}; +}// namespace std + +#endif//RSPAMD_CSS_SELECTOR_HXX diff --git a/src/libserver/css/css_selector_parser.rl b/src/libserver/css/css_selector_parser.rl new file mode 100644 index 0000000..f5ae936 --- /dev/null +++ b/src/libserver/css/css_selector_parser.rl @@ -0,0 +1,27 @@ +%%{ + machine css_parser; + alphtype unsigned char; + include css_syntax "css_syntax.rl"; + + main := selectors_group; +}%% + +%% write data; + +#include <cstddef> + +namespace rspamd::css { + +int +parse_css_selector (const unsigned char *data, std::size_t len) +{ + const unsigned char *p = data, *pe = data + len, *eof; + int cs; + + %% write init; + %% write exec; + + return cs; +} + +}
\ No newline at end of file diff --git a/src/libserver/css/css_style.hxx b/src/libserver/css/css_style.hxx new file mode 100644 index 0000000..429e58f --- /dev/null +++ b/src/libserver/css/css_style.hxx @@ -0,0 +1,66 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_CSS_STYLE_HXX +#define RSPAMD_CSS_STYLE_HXX + +#include <memory> +#include <vector> +#include "css_rule.hxx" +#include "css_selector.hxx" + +namespace rspamd::css { + +/* + * Full CSS style representation + */ +class css_style { +public: + /* Make class trivial */ + css_style(const css_style &other) = default; + + css_style(const std::shared_ptr<css_style> &_parent) + : parent(_parent) + { + propagate_from_parent(); + } + css_style(const std::shared_ptr<css_style> &_parent, + const std::vector<std::shared_ptr<css_selector>> &_selectors) + : parent(_parent) + { + selectors.reserve(_selectors.size()); + + for (const auto &sel_ptr: _selectors) { + selectors.emplace_back(sel_ptr); + } + + propagate_from_parent(); + } + +private: + std::vector<std::weak_ptr<css_selector>> selectors; + std::weak_ptr<css_style> parent; + std::vector<css_rule> rules; + +private: + void propagate_from_parent(void); /* Construct full style using parent */ +}; + +}// namespace rspamd::css + +#endif//RSPAMD_CSS_STYLE_HXX diff --git a/src/libserver/css/css_syntax.rl b/src/libserver/css/css_syntax.rl new file mode 100644 index 0000000..93da44b --- /dev/null +++ b/src/libserver/css/css_syntax.rl @@ -0,0 +1,110 @@ +%%{ + # CSS3 EBNF derived + machine css_syntax; + + # Primitive Atoms + COMMENT = ( + '/*' ( any )* :>> '*/' + ); + QUOTED_STRING = ('"' ( [^"\\] | /\\./ )* "'"); + BARE_URL_CHARS = ((0x21 + | 0x23..0x26 + | 0x2A..0xFF)+); + BARE_URL = BARE_URL_CHARS; + URL = 'url(' ( QUOTED_STRING | space* BARE_URL space* ) ')'; + nonascii = [^0x00-0x7F]; + nmstart = ([_a-zA-Z] | nonascii); + nmchar = ([_a-zA-Z0-9] | 0x2D | nonascii); + name = nmchar+; + num = ([0-9]+ | ([0-9]* '.' [0-9]+)); + CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n"); + IDENT = ([\-]? nmstart nmchar*); + ATTR = 'attr(' IDENT ')'; + + DIMENSION = '-'? num space? ( 'ch' | 'cm' | 'em' | 'ex' | 'fr' | 'in' | 'mm' | 'pc' | 'pt' | 'px' | 'Q' | 'rem' | 'vh' | 'vmax' | 'vmin' | 'vw' | 'dpi' ); + NUMBER = '-'? num; + HASH = '#' name; + HEX = '#' [0-9a-fA-F]{1,6}; + PERCENTAGE = '-'? num '%'; + INCLUDES = '~='; + DASHMATCH = '|='; + PREFIXMATCH = '^='; + SUFFIXMATCH = '$='; + SUBSTRINGMATCH = '*='; + PLUS = '+'; + GREATER = '>'; + COMMA = ','; + TILDE = '~'; + S = space; + + # Property name + property = ( QUOTED_STRING | IDENT ); + + # Values + important = space* '!' space* 'important'; + expression = ( ( '+' | PERCENTAGE | URL | ATTR | HEX | '-' | DIMENSION | NUMBER | QUOTED_STRING | IDENT | ',') S* )+; + functional_pseudo = (IDENT - ('attr'|'url')) '(' space* expression? ')'; + value = ( URL | ATTR | PLUS | HEX | PERCENTAGE | '-' | DIMENSION | NUMBER | QUOTED_STRING | IDENT | functional_pseudo); + values = value (space value | '/' value )* ( space* ',' space* value (space value | '/' value )* )* important?; + + # Declaration definition + declaration = (property space? ':' (property ':')* space? values); + + # Selectors + class = '.' IDENT; + element_name = IDENT; + namespace_prefix = ( IDENT | '*' )? '|'; + type_selector = namespace_prefix? element_name; + universal = namespace_prefix? '*'; + attrib = '[' space* namespace_prefix? IDENT space* ( ( PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | '=' | INCLUDES | DASHMATCH ) space* ( IDENT | QUOTED_STRING ) space* )? ']'; + pseudo = ':' ':'? ( IDENT | functional_pseudo ); + atrule = '@' IDENT; + mediaquery_selector = '(' declaration ')'; + negation_arg = type_selector + | universal + | HASH + | class + | attrib + | pseudo; + negation = 'NOT'|'not' space* negation_arg space* ')'; + # Haha, so simple... + # there should be also mediaquery_selector but it makes grammar too large, so rip it off + simple_selector_sequence = ( type_selector | universal ) ( HASH | class | attrib | pseudo | negation | atrule )* + | ( HASH | class | attrib | pseudo | negation | atrule )+; + combinator = space* PLUS space* + | space* GREATER space* + | space* TILDE space* + | space+; + # Combine simple stuff and obtain just... an ordinary selector, bingo + selector = simple_selector_sequence ( combinator simple_selector_sequence )*; + # Multiple beasts + selectors_group = selector ( COMMENT? ',' space* selector )*; + + # Rules + # This is mostly used stuff + rule = selectors_group space? "{" space* + (COMMENT? space* declaration ( space? ";" space? declaration?)* ";"? space?)* COMMENT* space* '}'; + query_declaration = rule; + + # Areas used in css + arearule = '@'('bottom-left'|'bottom-right'|'top-left'|'top-right'); + areaquery = arearule space? '{' space* (COMMENT? space* declaration ( S? ';' S? declaration?)* ';'? space?)* COMMENT* space* '}'; + # Printed media stuff, useless but we have to parse it :( + printcssrule = '@media print'; + pagearea = ':'('left'|'right'); + pagerule = '@page' space? pagearea?; + pagequery = pagerule space? '{' space* (areaquery| (COMMENT? space* declaration ( space? ';' space? declaration?)* ';'? S?)*) COMMENT* space* '}'; + printcssquery = printcssrule S? '{' ( S? COMMENT* S? (pagequery| COMMENT|query_declaration) S*)* S? '}'; + # Something that defines media + conditions = ('and'|'screen'|'or'|'only'|'not'|'amzn-mobi'|'amzn-kf8'|'amzn-mobi7'|','); + mediarule = '@media' space conditions ( space? conditions| space? mediaquery_selector )*; + mediaquery = mediarule space? '{' ( space? COMMENT* query_declaration)* S? '}'; + + simple_atrule = ("@charset"|"@namespace") space+ QUOTED_STRING space* ";"; + + import_rule = "@import" space+ ( QUOTED_STRING | URL ) space* ";"; + + # Final css definition + css_style = space* ( ( rule | simple_atrule | import_rule | mediaquery | printcssquery | COMMENT) space* )*; + +}%%
\ No newline at end of file diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx new file mode 100644 index 0000000..6d3f41e --- /dev/null +++ b/src/libserver/css/css_tokeniser.cxx @@ -0,0 +1,836 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_tokeniser.hxx" +#include "css_util.hxx" +#include "css.hxx" +#include "frozen/unordered_map.h" +#include "frozen/string.h" +#include <string> +#include <cmath> + +namespace rspamd::css { + +/* Helpers to create tokens */ + +/* + * This helper is intended to create tokens either with a tag and value + * or with just a tag. + */ +template<css_parser_token::token_type T, class Arg> +auto make_token(const Arg &arg) -> css_parser_token; + +template<> +auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::string_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::ident_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::ident_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::function_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::function_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::url_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::url_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::whitespace_token, s}; +} + +template<> +auto make_token<css_parser_token::token_type::delim_token, char>(const char &c) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::delim_token, c}; +} + +template<> +auto make_token<css_parser_token::token_type::number_token, float>(const float &d) + -> css_parser_token +{ + return css_parser_token{css_parser_token::token_type::number_token, d}; +} + +/* + * Generic tokens with no value (non-terminals) + */ +template<css_parser_token::token_type T> +auto make_token(void) -> css_parser_token +{ + return css_parser_token{T, css_parser_token_placeholder()}; +} + +static constexpr inline auto is_plain_ident_start(char c) -> bool +{ + if ((c & 0x80) || g_ascii_isalpha(c) || c == '_') { + return true; + } + + return false; +}; + +static constexpr inline auto is_plain_ident(char c) -> bool +{ + if (is_plain_ident_start(c) || c == '-' || g_ascii_isdigit(c)) { + return true; + } + + return false; +}; + +struct css_dimension_data { + css_parser_token::dim_type dtype; + double mult; +}; + +/* + * Maps from css dimensions to the multipliers that look reasonable in email + */ +constexpr const auto max_dims = static_cast<int>(css_parser_token::dim_type::dim_max); +constexpr frozen::unordered_map<frozen::string, css_dimension_data, max_dims> dimensions_map{ + {"px", {css_parser_token::dim_type::dim_px, 1.0}}, + /* EM/REM are 16 px, so multiply and round */ + {"em", {css_parser_token::dim_type::dim_em, 16.0}}, + {"rem", {css_parser_token::dim_type::dim_rem, 16.0}}, + /* + * Represents the x-height of the element's font. + * On fonts with the "x" letter, this is generally the height + * of lowercase letters in the font; 1ex = 0.5em in many fonts. + */ + {"ex", {css_parser_token::dim_type::dim_ex, 8.0}}, + {"wv", {css_parser_token::dim_type::dim_wv, 8.0}}, + {"wh", {css_parser_token::dim_type::dim_wh, 6.0}}, + {"vmax", {css_parser_token::dim_type::dim_vmax, 8.0}}, + {"vmin", {css_parser_token::dim_type::dim_vmin, 6.0}}, + /* One point. 1pt = 1/72nd of 1in */ + {"pt", {css_parser_token::dim_type::dim_pt, 96.0 / 72.0}}, + /* 96px/2.54 */ + {"cm", {css_parser_token::dim_type::dim_cm, 96.0 / 2.54}}, + {"mm", {css_parser_token::dim_type::dim_mm, 9.60 / 2.54}}, + {"in", {css_parser_token::dim_type::dim_in, 96.0}}, + /* 1pc = 12pt = 1/6th of 1in. */ + {"pc", {css_parser_token::dim_type::dim_pc, 96.0 / 6.0}}}; + +auto css_parser_token::adjust_dim(const css_parser_token &dim_token) -> bool +{ + if (!std::holds_alternative<float>(value) || + !std::holds_alternative<std::string_view>(dim_token.value)) { + /* Invalid tokens */ + return false; + } + + auto num = std::get<float>(value); + auto sv = std::get<std::string_view>(dim_token.value); + + auto dim_found = find_map(dimensions_map, sv); + + if (dim_found) { + auto dim_elt = dim_found.value().get(); + dimension_type = dim_elt.dtype; + flags |= css_parser_token::number_dimension; + num *= dim_elt.mult; + } + else { + flags |= css_parser_token::flag_bad_dimension; + + return false; + } + + value = num; + + return true; +} + + +/* + * Consume functions: return a token and advance lexer offset + */ +auto css_tokeniser::consume_ident(bool allow_number) -> struct css_parser_token { + auto i = offset; + auto need_escape = false; + auto allow_middle_minus = false; + + auto maybe_escape_sv = [&](auto cur_pos, auto tok_type) -> auto { + if (need_escape) { + auto escaped = rspamd::css::unescape_css(pool, {&input[offset], + cur_pos - offset}); + offset = cur_pos; + + return css_parser_token{tok_type, escaped}; + } + + auto result = std::string_view{&input[offset], cur_pos - offset}; + offset = cur_pos; + + return css_parser_token{tok_type, result}; + }; + + /* Ident token can start from `-` or `--` */ + if (input[i] == '-') { + i++; + + if (i < input.size() && input[i] == '-') { + i++; + allow_middle_minus = true; + } + } + + while (i < input.size()) { + auto c = input[i]; + + auto is_plain_c = (allow_number || allow_middle_minus) ? is_plain_ident(c) : is_plain_ident_start(c); + if (!is_plain_c) { + if (c == '\\' && i + 1 < input.size()) { + /* Escape token */ + need_escape = true; + auto nhex = 0; + + /* Need to find an escape end */ + do { + c = input[++i]; + if (g_ascii_isxdigit(c)) { + nhex++; + + if (nhex > 6) { + /* End of the escape */ + break; + } + } + else if (nhex > 0 && c == ' ') { + /* \[hex]{1,6} */ + i++; /* Skip one space */ + break; + } + else { + /* Single \ + char */ + break; + } + } while (i < input.size()); + } + else if (c == '(') { + /* Function or url token */ + auto j = i + 1; + + while (j < input.size() && g_ascii_isspace(input[j])) { + j++; + } + + if (input.size() - offset > 3 && input.substr(offset, 3) == "url") { + if (j < input.size() && (input[j] == '"' || input[j] == '\'')) { + /* Function token */ + auto ret = maybe_escape_sv(i, + css_parser_token::token_type::function_token); + return ret; + } + else { + /* Consume URL token */ + while (j < input.size() && input[j] != ')') { + j++; + } + + if (j < input.size() && input[j] == ')') { + /* Valid url token */ + auto ret = maybe_escape_sv(j + 1, + css_parser_token::token_type::url_token); + return ret; + } + else { + /* Incomplete url token */ + auto ret = maybe_escape_sv(j, + css_parser_token::token_type::url_token); + + ret.flags |= css_parser_token::flag_bad_string; + return ret; + } + } + } + else { + auto ret = maybe_escape_sv(i, + css_parser_token::token_type::function_token); + return ret; + } + } + else if (c == '-' && allow_middle_minus) { + i++; + continue; + } + else { + break; /* Not an ident token */ + } + } /* !plain ident */ + else { + allow_middle_minus = true; + } + + i++; + } + + return maybe_escape_sv(i, css_parser_token::token_type::ident_token); +} + +auto +css_tokeniser::consume_number() -> struct css_parser_token { + auto i = offset; + auto seen_dot = false, seen_exp = false; + + if (input[i] == '-' || input[i] == '+') { + i++; + } + if (input[i] == '.' && i < input.size()) { + seen_dot = true; + i++; + } + + while (i < input.size()) { + auto c = input[i]; + + if (!g_ascii_isdigit(c)) { + if (c == '.') { + if (!seen_dot) { + seen_dot = true; + } + else { + break; + } + } + else if (c == 'e' || c == 'E') { + if (!seen_exp) { + seen_exp = true; + seen_dot = true; /* dots are not allowed after e */ + + if (i + 1 < input.size()) { + auto next_c = input[i + 1]; + if (next_c == '+' || next_c == '-') { + i++; + } + else if (!g_ascii_isdigit(next_c)) { + /* Not an exponent */ + break; + } + } + else { + /* Not an exponent */ + break; + } + } + else { + break; + } + } + else { + break; + } + } + + i++; + } + + if (i > offset) { + /* I wish it was supported properly */ + //auto conv_res = std::from_chars(&input[offset], &input[i], num); + char numbuf[128], *endptr = nullptr; + rspamd_strlcpy(numbuf, &input[offset], MIN(i - offset + 1, sizeof(numbuf))); + auto num = g_ascii_strtod(numbuf, &endptr); + offset = i; + + if (fabs(num) >= G_MAXFLOAT || std::isnan(num)) { + msg_debug_css("invalid number: %s", numbuf); + return make_token<css_parser_token::token_type::delim_token>(input[i - 1]); + } + else { + + auto ret = make_token<css_parser_token::token_type::number_token>(static_cast<float>(num)); + + if (i < input.size()) { + if (input[i] == '%') { + ret.flags |= css_parser_token::number_percent; + i++; + + offset = i; + } + else if (is_plain_ident_start(input[i])) { + auto dim_token = consume_ident(); + + if (dim_token.type == css_parser_token::token_type::ident_token) { + if (!ret.adjust_dim(dim_token)) { + auto sv = std::get<std::string_view>(dim_token.value); + msg_debug_css("cannot apply dimension from the token %*s; number value = %.1f", + (int) sv.size(), sv.begin(), num); + /* Unconsume ident */ + offset = i; + } + } + else { + /* We have no option but to uncosume ident token in this case */ + msg_debug_css("got invalid ident like token after number, unconsume it"); + } + } + else { + /* Plain number, nothing to do */ + } + } + + return ret; + } + } + else { + msg_err_css("internal error: invalid number, empty token"); + i++; + } + + offset = i; + /* Should not happen */ + return make_token<css_parser_token::token_type::delim_token>(input[i - 1]); +} + +/* + * Main routine to produce lexer tokens + */ +auto +css_tokeniser::next_token(void) -> struct css_parser_token { + /* Check pushback queue */ + if (!backlog.empty()) { + auto tok = backlog.front(); + backlog.pop_front(); + + return tok; + } + /* Helpers */ + + /* + * This lambda eats comment handling nested comments; + * offset is set to the next character after a comment (or eof) + * Nothing is returned + */ + auto consume_comment = [this]() { + auto i = offset; + auto nested = 0; + + if (input.empty()) { + /* Nothing to consume */ + return; + } + + /* We handle nested comments just because they can exist... */ + while (i < input.size() - 1) { + auto c = input[i]; + if (c == '*' && input[i + 1] == '/') { + if (nested == 0) { + offset = i + 2; + return; + } + else { + nested--; + i += 2; + continue; + } + } + else if (c == '/' && input[i + 1] == '*') { + nested++; + i += 2; + continue; + } + + i++; + } + + offset = i; + }; + + /* + * Consume quoted string, returns a string_view over a string, offset + * is set one character after the string. Css unescaping is done automatically + * Accepts a quote char to find end of string + */ + auto consume_string = [this](auto quote_char) -> auto { + auto i = offset; + bool need_unescape = false; + + while (i < input.size()) { + auto c = input[i]; + + if (c == '\\') { + if (i + 1 < input.size()) { + need_unescape = true; + } + else { + /* \ at the end -> ignore */ + } + } + else if (c == quote_char) { + /* End of string */ + std::string_view res{&input[offset], i - offset}; + + if (need_unescape) { + res = rspamd::css::unescape_css(pool, res); + } + + offset = i + 1; + + return res; + } + else if (c == '\n') { + /* Should be a error, but we ignore it for now */ + } + + i++; + } + + /* EOF with no quote character, consider it fine */ + std::string_view res{&input[offset], i - offset}; + + if (need_unescape) { + res = rspamd::css::unescape_css(pool, res); + } + + offset = i; + + return res; + }; + + /* Main tokenisation loop */ + for (auto i = offset; i < input.size(); ++i) { + auto c = input[i]; + + switch (c) { + case '/': + if (i + 1 < input.size() && input[i + 1] == '*') { + offset = i + 2; + consume_comment(); /* Consume comment and go forward */ + return next_token(); /* Tail call */ + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + break; + case ' ': + case '\t': + case '\n': + case '\r': + case '\f': { + /* Consume as much space as we can */ + while (i < input.size() && g_ascii_isspace(input[i])) { + i++; + } + + auto ret = make_token<css_parser_token::token_type::whitespace_token>( + std::string_view(&input[offset], i - offset)); + offset = i; + return ret; + } + case '"': + case '\'': + offset = i + 1; + if (offset < input.size()) { + return make_token<css_parser_token::token_type::string_token>(consume_string(c)); + } + else { + /* Unpaired quote at the end of the rule */ + return make_token<css_parser_token::token_type::delim_token>(c); + } + case '(': + offset = i + 1; + return make_token<css_parser_token::token_type::obrace_token>(); + case ')': + offset = i + 1; + return make_token<css_parser_token::token_type::ebrace_token>(); + case '[': + offset = i + 1; + return make_token<css_parser_token::token_type::osqbrace_token>(); + case ']': + offset = i + 1; + return make_token<css_parser_token::token_type::esqbrace_token>(); + case '{': + offset = i + 1; + return make_token<css_parser_token::token_type::ocurlbrace_token>(); + case '}': + offset = i + 1; + return make_token<css_parser_token::token_type::ecurlbrace_token>(); + case ',': + offset = i + 1; + return make_token<css_parser_token::token_type::comma_token>(); + case ';': + offset = i + 1; + return make_token<css_parser_token::token_type::semicolon_token>(); + case ':': + offset = i + 1; + return make_token<css_parser_token::token_type::colon_token>(); + case '<': + /* Maybe an xml like comment */ + if (i + 3 < input.size() && input[i + 1] == '!' && input[i + 2] == '-' && input[i + 3] == '-') { + offset += 3; + + return make_token<css_parser_token::token_type::cdo_token>(); + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + break; + case '-': + if (i + 1 < input.size()) { + auto next_c = input[i + 1]; + + if (g_ascii_isdigit(next_c)) { + /* negative number */ + return consume_number(); + } + else if (next_c == '-') { + if (i + 2 < input.size() && input[i + 2] == '>') { + /* XML like comment */ + offset += 3; + + return make_token<css_parser_token::token_type::cdc_token>(); + } + } + } + /* No other options, a delimiter - */ + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + + break; + case '+': + case '.': + /* Maybe number */ + if (i + 1 < input.size()) { + auto next_c = input[i + 1]; + + if (g_ascii_isdigit(next_c)) { + /* Numeric token */ + return consume_number(); + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + } + /* No other options, a delimiter - */ + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + + break; + case '\\': + if (i + 1 < input.size()) { + if (input[i + 1] == '\n' || input[i + 1] == '\r') { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + else { + /* Valid escape, assume ident */ + return consume_ident(); + } + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + break; + case '@': + if (i + 3 < input.size()) { + if (is_plain_ident_start(input[i + 1]) && + is_plain_ident(input[i + 2]) && is_plain_ident(input[i + 3])) { + offset = i + 1; + auto ident_token = consume_ident(); + + if (ident_token.type == css_parser_token::token_type::ident_token) { + /* Update type */ + ident_token.type = css_parser_token::token_type::at_keyword_token; + } + + return ident_token; + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + break; + case '#': + /* TODO: make it more conformant */ + if (i + 2 < input.size()) { + auto next_c = input[i + 1], next_next_c = input[i + 2]; + if ((is_plain_ident(next_c) || next_c == '-') && + (is_plain_ident(next_next_c) || next_next_c == '-')) { + offset = i + 1; + /* We consume indent, but we allow numbers there */ + auto ident_token = consume_ident(true); + + if (ident_token.type == css_parser_token::token_type::ident_token) { + /* Update type */ + ident_token.type = css_parser_token::token_type::hash_token; + } + + return ident_token; + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + break; + default: + /* Generic parsing code */ + + if (g_ascii_isdigit(c)) { + return consume_number(); + } + else if (is_plain_ident_start(c)) { + return consume_ident(); + } + else { + offset = i + 1; + return make_token<css_parser_token::token_type::delim_token>(c); + } + break; + } + } + + return make_token<css_parser_token::token_type::eof_token>(); +} + +constexpr auto +css_parser_token::get_token_type() -> const char * +{ + const char *ret = "unknown"; + + switch (type) { + case token_type::whitespace_token: + ret = "whitespace"; + break; + case token_type::ident_token: + ret = "ident"; + break; + case token_type::function_token: + ret = "function"; + break; + case token_type::at_keyword_token: + ret = "atkeyword"; + break; + case token_type::hash_token: + ret = "hash"; + break; + case token_type::string_token: + ret = "string"; + break; + case token_type::number_token: + ret = "number"; + break; + case token_type::url_token: + ret = "url"; + break; + case token_type::cdo_token: /* xml open comment */ + ret = "cdo"; + break; + case token_type::cdc_token: /* xml close comment */ + ret = "cdc"; + break; + case token_type::delim_token: + ret = "delim"; + break; + case token_type::obrace_token: /* ( */ + ret = "obrace"; + break; + case token_type::ebrace_token: /* ) */ + ret = "ebrace"; + break; + case token_type::osqbrace_token: /* [ */ + ret = "osqbrace"; + break; + case token_type::esqbrace_token: /* ] */ + ret = "esqbrace"; + break; + case token_type::ocurlbrace_token: /* { */ + ret = "ocurlbrace"; + break; + case token_type::ecurlbrace_token: /* } */ + ret = "ecurlbrace"; + break; + case token_type::comma_token: + ret = "comma"; + break; + case token_type::colon_token: + ret = "colon"; + break; + case token_type::semicolon_token: + ret = "semicolon"; + break; + case token_type::eof_token: + ret = "eof"; + break; + } + + return ret; +} + + +auto css_parser_token::debug_token_str() -> std::string +{ + const auto *token_type_str = get_token_type(); + std::string ret = token_type_str; + + std::visit([&](auto arg) -> auto { + using T = std::decay_t<decltype(arg)>; + + if constexpr (std::is_same_v<T, std::string_view> || std::is_same_v<T, char>) { + ret += "; value="; + ret += arg; + } + else if constexpr (std::is_same_v<T, double>) { + ret += "; value="; + ret += std::to_string(arg); + } + }, + value); + + if ((flags & (~number_dimension)) != default_flags) { + ret += "; flags=" + std::to_string(flags); + } + + if (flags & number_dimension) { + ret += "; dim=" + std::to_string(static_cast<int>(dimension_type)); + } + + return ret; /* Copy elision */ +} + +}// namespace rspamd::css
\ No newline at end of file diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx new file mode 100644 index 0000000..aa6a1a7 --- /dev/null +++ b/src/libserver/css/css_tokeniser.hxx @@ -0,0 +1,215 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_CSS_TOKENISER_HXX +#define RSPAMD_CSS_TOKENISER_HXX + +#include <string_view> +#include <utility> +#include <variant> +#include <list> +#include <functional> +#include <cstdint> +#include "mem_pool.h" + +namespace rspamd::css { + +struct css_parser_token_placeholder {}; /* For empty tokens */ + +struct css_parser_token { + + enum class token_type : std::uint8_t { + whitespace_token, + ident_token, + function_token, + at_keyword_token, + hash_token, + string_token, + number_token, + url_token, + cdo_token, /* xml open comment */ + cdc_token, /* xml close comment */ + delim_token, + obrace_token, /* ( */ + ebrace_token, /* ) */ + osqbrace_token, /* [ */ + esqbrace_token, /* ] */ + ocurlbrace_token, /* { */ + ecurlbrace_token, /* } */ + comma_token, + colon_token, + semicolon_token, + eof_token, + }; + + enum class dim_type : std::uint8_t { + dim_px = 0, + dim_em, + dim_rem, + dim_ex, + dim_wv, + dim_wh, + dim_vmax, + dim_vmin, + dim_pt, + dim_cm, + dim_mm, + dim_in, + dim_pc, + dim_max, + }; + + static const std::uint8_t default_flags = 0; + static const std::uint8_t flag_bad_string = (1u << 0u); + static const std::uint8_t number_dimension = (1u << 1u); + static const std::uint8_t number_percent = (1u << 2u); + static const std::uint8_t flag_bad_dimension = (1u << 3u); + + using value_type = std::variant<std::string_view, /* For strings and string like tokens */ + char, /* For delimiters (might need to move to unicode point) */ + float, /* For numeric stuff */ + css_parser_token_placeholder /* For general no token stuff */ + >; + + /* Typed storage */ + value_type value; + + int lineno; + + token_type type; + std::uint8_t flags = default_flags; + dim_type dimension_type; + + css_parser_token() = delete; + explicit css_parser_token(token_type type, const value_type &value) + : value(value), type(type) + { + } + css_parser_token(css_parser_token &&other) = default; + css_parser_token(const css_parser_token &token) = default; + auto operator=(css_parser_token &&other) -> css_parser_token & = default; + auto adjust_dim(const css_parser_token &dim_token) -> bool; + + auto get_string_or_default(const std::string_view &def) const -> std::string_view + { + if (std::holds_alternative<std::string_view>(value)) { + return std::get<std::string_view>(value); + } + else if (std::holds_alternative<char>(value)) { + return std::string_view(&std::get<char>(value), 1); + } + + return def; + } + + auto get_delim() const -> char + { + if (std::holds_alternative<char>(value)) { + return std::get<char>(value); + } + + return (char) -1; + } + + auto get_number_or_default(float def) const -> float + { + if (std::holds_alternative<float>(value)) { + auto dbl = std::get<float>(value); + + if (flags & css_parser_token::number_percent) { + dbl /= 100.0; + } + + return dbl; + } + + return def; + } + + auto get_normal_number_or_default(float def) const -> float + { + if (std::holds_alternative<float>(value)) { + auto dbl = std::get<float>(value); + + if (flags & css_parser_token::number_percent) { + dbl /= 100.0; + } + + if (dbl < 0) { + return 0.0; + } + else if (dbl > 1.0) { + return 1.0; + } + + return dbl; + } + + return def; + } + + /* Debugging routines */ + constexpr auto get_token_type() -> const char *; + /* This function might be slow */ + auto debug_token_str() -> std::string; +}; + +static auto css_parser_eof_token(void) -> const css_parser_token & +{ + static css_parser_token eof_tok{ + css_parser_token::token_type::eof_token, + css_parser_token_placeholder()}; + + return eof_tok; +} + +/* Ensure that parser tokens are simple enough */ +/* + * compiler must implement P0602 "variant and optional should propagate copy/move triviality" + * This is broken on gcc < 8! + */ +static_assert(std::is_trivially_copyable_v<css_parser_token>); + +class css_tokeniser { +public: + css_tokeniser() = delete; + css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) + : input(sv), offset(0), pool(pool) + { + } + + auto next_token(void) -> struct css_parser_token; + auto pushback_token(const struct css_parser_token &t) const -> void + { + backlog.push_back(t); + } + +private: + std::string_view input; + std::size_t offset; + rspamd_mempool_t *pool; + mutable std::list<css_parser_token> backlog; + + auto consume_number() -> struct css_parser_token; + auto consume_ident(bool allow_number = false) -> struct css_parser_token; +}; + +}// namespace rspamd::css + + +#endif//RSPAMD_CSS_TOKENISER_HXX diff --git a/src/libserver/css/css_util.cxx b/src/libserver/css/css_util.cxx new file mode 100644 index 0000000..07f8722 --- /dev/null +++ b/src/libserver/css/css_util.cxx @@ -0,0 +1,157 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_util.hxx" +#include "css.hxx" +#include <unicode/utf8.h> + +namespace rspamd::css { + +std::string_view unescape_css(rspamd_mempool_t *pool, + const std::string_view &sv) +{ + auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, sv.length())); + auto *d = nspace; + auto nleft = sv.length(); + + enum { + normal = 0, + quoted, + escape, + skip_spaces, + } state = normal; + + char quote_char, prev_c = 0; + auto escape_offset = 0, i = 0; + +#define MAYBE_CONSUME_CHAR(c) \ + do { \ + if ((c) == '"' || (c) == '\'') { \ + state = quoted; \ + quote_char = (c); \ + nleft--; \ + *d++ = (c); \ + } \ + else if ((c) == '\\') { \ + escape_offset = i; \ + state = escape; \ + } \ + else { \ + state = normal; \ + nleft--; \ + *d++ = g_ascii_tolower(c); \ + } \ + } while (0) + + for (const auto c: sv) { + if (nleft == 0) { + msg_err_css("cannot unescape css: truncated buffer of size %d", + (int) sv.length()); + break; + } + switch (state) { + case normal: + MAYBE_CONSUME_CHAR(c); + break; + case quoted: + if (c == quote_char) { + if (prev_c != '\\') { + state = normal; + } + } + prev_c = c; + nleft--; + *d++ = c; + break; + case escape: + if (!g_ascii_isxdigit(c)) { + if (i > escape_offset + 1) { + /* Try to decode an escape */ + const auto *escape_start = &sv[escape_offset + 1]; + unsigned long val; + + if (!rspamd_xstrtoul(escape_start, i - escape_offset - 1, &val)) { + msg_debug_css("invalid broken escape found at pos %d", + escape_offset); + } + else { + if (val < 0x80) { + /* Trivial case: ascii character */ + *d++ = (unsigned char) g_ascii_tolower(val); + nleft--; + } + else { + UChar32 uc = val; + auto off = 0; + UTF8_APPEND_CHAR_SAFE((uint8_t *) d, off, + sv.length(), u_tolower(uc)); + d += off; + nleft -= off; + } + } + } + else { + /* Empty escape, ignore it */ + msg_debug_css("invalid empty escape found at pos %d", + escape_offset); + } + + if (nleft <= 0) { + msg_err_css("cannot unescape css: truncated buffer of size %d", + (int) sv.length()); + } + else { + /* Escape is done, advance forward */ + if (g_ascii_isspace(c)) { + state = skip_spaces; + } + else { + MAYBE_CONSUME_CHAR(c); + } + } + } + break; + case skip_spaces: + if (!g_ascii_isspace(c)) { + MAYBE_CONSUME_CHAR(c); + } + /* Ignore spaces */ + break; + } + + i++; + } + + return std::string_view{nspace, sv.size() - nleft}; +} + +}// namespace rspamd::css + +/* C API */ +const gchar *rspamd_css_unescape(rspamd_mempool_t *pool, + const guchar *begin, + gsize len, + gsize *outlen) +{ + auto sv = rspamd::css::unescape_css(pool, {(const char *) begin, len}); + const auto *v = sv.begin(); + + if (outlen) { + *outlen = sv.size(); + } + + return v; +}
\ No newline at end of file diff --git a/src/libserver/css/css_util.hxx b/src/libserver/css/css_util.hxx new file mode 100644 index 0000000..4837a46 --- /dev/null +++ b/src/libserver/css/css_util.hxx @@ -0,0 +1,37 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_CSS_UTIL_HXX +#define RSPAMD_CSS_UTIL_HXX + +#include <string_view> +#include "mem_pool.h" + +namespace rspamd::css { + +/* + * Unescape css escapes + * \20AC : must be followed by a space if the next character is one of a-f, A-F, 0-9 + * \0020AC : must be 6 digits long, no space needed (but can be included) + */ +std::string_view unescape_css(rspamd_mempool_t *pool, + const std::string_view &sv); + +}// namespace rspamd::css + +#endif//RSPAMD_CSS_UTIL_HXX diff --git a/src/libserver/css/css_value.cxx b/src/libserver/css/css_value.cxx new file mode 100644 index 0000000..2546e01 --- /dev/null +++ b/src/libserver/css/css_value.cxx @@ -0,0 +1,449 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "css_value.hxx" +#include "css_colors_list.hxx" +#include "frozen/unordered_map.h" +#include "frozen/string.h" +#include "libutil/util.h" +#include "contrib/ankerl/unordered_dense.h" +#include "fmt/core.h" + +#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL +#include "doctest/doctest.h" + +/* Helper for unit test stringification */ +namespace doctest { +template<> +struct StringMaker<rspamd::css::css_color> { + static String convert(const rspamd::css::css_color &value) + { + return fmt::format("r={};g={};b={};alpha={}", + value.r, value.g, value.b, value.alpha) + .c_str(); + } +}; + +}// namespace doctest + +namespace rspamd::css { + +auto css_value::maybe_color_from_string(const std::string_view &input) + -> std::optional<css_value> +{ + + if (input.size() > 1 && input.front() == '#') { + return css_value::maybe_color_from_hex(input.substr(1)); + } + else { + auto found_it = css_colors_map.find(input); + + if (found_it != css_colors_map.end()) { + return css_value{found_it->second}; + } + } + + return std::nullopt; +} + +constexpr static inline auto hexpair_decode(char c1, char c2) -> std::uint8_t +{ + std::uint8_t ret = 0; + + if (c1 >= '0' && c1 <= '9') ret = c1 - '0'; + else if (c1 >= 'A' && c1 <= 'F') + ret = c1 - 'A' + 10; + else if (c1 >= 'a' && c1 <= 'f') + ret = c1 - 'a' + 10; + + ret *= 16; + + if (c2 >= '0' && c2 <= '9') ret += c2 - '0'; + else if (c2 >= 'A' && c2 <= 'F') + ret += c2 - 'A' + 10; + else if (c2 >= 'a' && c2 <= 'f') + ret += c2 - 'a' + 10; + + return ret; +} + +auto css_value::maybe_color_from_hex(const std::string_view &input) + -> std::optional<css_value> +{ + if (input.length() == 6) { + /* Plain RGB */ + css_color col(hexpair_decode(input[0], input[1]), + hexpair_decode(input[2], input[3]), + hexpair_decode(input[4], input[5])); + return css_value(col); + } + else if (input.length() == 3) { + /* Rgb as 3 hex digests */ + css_color col(hexpair_decode(input[0], input[0]), + hexpair_decode(input[1], input[1]), + hexpair_decode(input[2], input[2])); + return css_value(col); + } + else if (input.length() == 8) { + /* RGBA */ + css_color col(hexpair_decode(input[0], input[1]), + hexpair_decode(input[2], input[3]), + hexpair_decode(input[4], input[5]), + hexpair_decode(input[6], input[7])); + return css_value(col); + } + + return std::nullopt; +} + +constexpr static inline auto rgb_color_component_convert(const css_parser_token &tok) + -> std::uint8_t +{ + std::uint8_t ret = 0; + + if (tok.type == css_parser_token::token_type::number_token) { + auto dbl = std::get<float>(tok.value); + + if (tok.flags & css_parser_token::number_percent) { + if (dbl > 100) { + dbl = 100; + } + else if (dbl < 0) { + dbl = 0; + } + ret = (std::uint8_t)(dbl / 100.0 * 255.0); + } + else { + if (dbl > 255) { + dbl = 255; + } + else if (dbl < 0) { + dbl = 0; + } + + ret = (std::uint8_t)(dbl); + } + } + + return ret; +} + +constexpr static inline auto alpha_component_convert(const css_parser_token &tok) + -> std::uint8_t +{ + double ret = 1.0; + + if (tok.type == css_parser_token::token_type::number_token) { + auto dbl = std::get<float>(tok.value); + + if (tok.flags & css_parser_token::number_percent) { + if (dbl > 100) { + dbl = 100; + } + else if (dbl < 0) { + dbl = 0; + } + ret = (dbl / 100.0); + } + else { + if (dbl > 1.0) { + dbl = 1.0; + } + else if (dbl < 0) { + dbl = 0; + } + + ret = dbl; + } + } + + return (std::uint8_t)(ret * 255.0); +} + +constexpr static inline auto h_component_convert(const css_parser_token &tok) + -> double +{ + double ret = 0.0; + + if (tok.type == css_parser_token::token_type::number_token) { + auto dbl = std::get<float>(tok.value); + + if (tok.flags & css_parser_token::number_percent) { + if (dbl > 100) { + dbl = 100; + } + else if (dbl < 0) { + dbl = 0; + } + ret = (dbl / 100.0); + } + else { + dbl = ((((int) dbl % 360) + 360) % 360); /* Deal with rotations */ + ret = dbl / 360.0; /* Normalize to 0..1 */ + } + } + + return ret; +} + +constexpr static inline auto sl_component_convert(const css_parser_token &tok) + -> double +{ + double ret = 0.0; + + if (tok.type == css_parser_token::token_type::number_token) { + ret = tok.get_normal_number_or_default(ret); + } + + return ret; +} + +static inline auto hsl_to_rgb(double h, double s, double l) + -> css_color +{ + css_color ret; + + constexpr auto hue2rgb = [](auto p, auto q, auto t) -> auto { + if (t < 0.0) { + t += 1.0; + } + if (t > 1.0) { + t -= 1.0; + } + if (t * 6. < 1.0) { + return p + (q - p) * 6.0 * t; + } + if (t * 2. < 1) { + return q; + } + if (t * 3. < 2.) { + return p + (q - p) * (2.0 / 3.0 - t) * 6.0; + } + return p; + }; + + if (s == 0) { + /* Achromatic */ + ret.r = l; + ret.g = l; + ret.b = l; + } + else { + auto q = l <= 0.5 ? l * (1.0 + s) : l + s - l * s; + auto p = 2.0 * l - q; + ret.r = (std::uint8_t)(hue2rgb(p, q, h + 1.0 / 3.0) * 255); + ret.g = (std::uint8_t)(hue2rgb(p, q, h) * 255); + ret.b = (std::uint8_t)(hue2rgb(p, q, h - 1.0 / 3.0) * 255); + } + + ret.alpha = 255; + + return ret; +} + +auto css_value::maybe_color_from_function(const css_consumed_block::css_function_block &func) + -> std::optional<css_value> +{ + + if (func.as_string() == "rgb" && func.args.size() == 3) { + css_color col{rgb_color_component_convert(func.args[0]->get_token_or_empty()), + rgb_color_component_convert(func.args[1]->get_token_or_empty()), + rgb_color_component_convert(func.args[2]->get_token_or_empty())}; + + return css_value(col); + } + else if (func.as_string() == "rgba" && func.args.size() == 4) { + css_color col{rgb_color_component_convert(func.args[0]->get_token_or_empty()), + rgb_color_component_convert(func.args[1]->get_token_or_empty()), + rgb_color_component_convert(func.args[2]->get_token_or_empty()), + alpha_component_convert(func.args[3]->get_token_or_empty())}; + + return css_value(col); + } + else if (func.as_string() == "hsl" && func.args.size() == 3) { + auto h = h_component_convert(func.args[0]->get_token_or_empty()); + auto s = sl_component_convert(func.args[1]->get_token_or_empty()); + auto l = sl_component_convert(func.args[2]->get_token_or_empty()); + + auto col = hsl_to_rgb(h, s, l); + + return css_value(col); + } + else if (func.as_string() == "hsla" && func.args.size() == 4) { + auto h = h_component_convert(func.args[0]->get_token_or_empty()); + auto s = sl_component_convert(func.args[1]->get_token_or_empty()); + auto l = sl_component_convert(func.args[2]->get_token_or_empty()); + + auto col = hsl_to_rgb(h, s, l); + col.alpha = alpha_component_convert(func.args[3]->get_token_or_empty()); + + return css_value(col); + } + + return std::nullopt; +} + +auto css_value::maybe_dimension_from_number(const css_parser_token &tok) + -> std::optional<css_value> +{ + if (std::holds_alternative<float>(tok.value)) { + auto dbl = std::get<float>(tok.value); + css_dimension dim; + + dim.dim = dbl; + + if (tok.flags & css_parser_token::number_percent) { + dim.is_percent = true; + } + else { + dim.is_percent = false; + } + + return css_value{dim}; + } + + return std::nullopt; +} + +constexpr const auto display_names_map = frozen::make_unordered_map<frozen::string, css_display_value>({ + {"hidden", css_display_value::DISPLAY_HIDDEN}, + {"none", css_display_value::DISPLAY_HIDDEN}, + {"inline", css_display_value::DISPLAY_INLINE}, + {"block", css_display_value::DISPLAY_BLOCK}, + {"content", css_display_value::DISPLAY_INLINE}, + {"flex", css_display_value::DISPLAY_BLOCK}, + {"grid", css_display_value::DISPLAY_BLOCK}, + {"inline-block", css_display_value::DISPLAY_INLINE}, + {"inline-flex", css_display_value::DISPLAY_INLINE}, + {"inline-grid", css_display_value::DISPLAY_INLINE}, + {"inline-table", css_display_value::DISPLAY_INLINE}, + {"list-item", css_display_value::DISPLAY_BLOCK}, + {"run-in", css_display_value::DISPLAY_INLINE}, + {"table", css_display_value::DISPLAY_BLOCK}, + {"table-caption", css_display_value::DISPLAY_TABLE_ROW}, + {"table-column-group", css_display_value::DISPLAY_TABLE_ROW}, + {"table-header-group", css_display_value::DISPLAY_TABLE_ROW}, + {"table-footer-group", css_display_value::DISPLAY_TABLE_ROW}, + {"table-row-group", css_display_value::DISPLAY_TABLE_ROW}, + {"table-cell", css_display_value::DISPLAY_TABLE_ROW}, + {"table-column", css_display_value::DISPLAY_TABLE_ROW}, + {"table-row", css_display_value::DISPLAY_TABLE_ROW}, + {"initial", css_display_value::DISPLAY_INLINE}, +}); + +auto css_value::maybe_display_from_string(const std::string_view &input) + -> std::optional<css_value> +{ + auto f = display_names_map.find(input); + + if (f != display_names_map.end()) { + return css_value{f->second}; + } + + return std::nullopt; +} + + +auto css_value::debug_str() const -> std::string +{ + std::string ret; + + std::visit([&](const auto &arg) { + using T = std::decay_t<decltype(arg)>; + + if constexpr (std::is_same_v<T, css_color>) { + ret += fmt::format("color: r={};g={};b={};alpha={}", + arg.r, arg.g, arg.b, arg.alpha); + } + else if constexpr (std::is_same_v<T, double>) { + ret += "size: " + std::to_string(arg); + } + else if constexpr (std::is_same_v<T, css_dimension>) { + ret += "dimension: " + std::to_string(arg.dim); + if (arg.is_percent) { + ret += "%"; + } + } + else if constexpr (std::is_same_v<T, css_display_value>) { + ret += "display: "; + switch (arg) { + case css_display_value::DISPLAY_HIDDEN: + ret += "hidden"; + break; + case css_display_value::DISPLAY_BLOCK: + ret += "block"; + break; + case css_display_value::DISPLAY_INLINE: + ret += "inline"; + break; + case css_display_value::DISPLAY_TABLE_ROW: + ret += "table_row"; + break; + } + } + else if constexpr (std::is_integral_v<T>) { + ret += "integral: " + std::to_string(static_cast<int>(arg)); + } + else { + ret += "nyi"; + } + }, + value); + + return ret; +} + +TEST_SUITE("css"){ + TEST_CASE("css hex colors"){ + const std::pair<const char *, css_color> hex_tests[] = { + {"000", css_color(0, 0, 0)}, + {"000000", css_color(0, 0, 0)}, + {"f00", css_color(255, 0, 0)}, + {"FEDCBA", css_color(254, 220, 186)}, + {"234", css_color(34, 51, 68)}, + }; + +for (const auto &p: hex_tests) { + SUBCASE((std::string("parse hex color: ") + p.first).c_str()) + { + auto col_parsed = css_value::maybe_color_from_hex(p.first); + //CHECK_UNARY(col_parsed); + //CHECK_UNARY(col_parsed.value().to_color()); + auto final_col = col_parsed.value().to_color().value(); + CHECK(final_col == p.second); + } +} +}// namespace rspamd::css +TEST_CASE("css colors strings") +{ + auto passed = 0; + for (const auto &p: css_colors_map) { + /* Match some of the colors selected randomly */ + if (rspamd_random_double_fast() > 0.9) { + auto col_parsed = css_value::maybe_color_from_string(p.first); + auto final_col = col_parsed.value().to_color().value(); + CHECK_MESSAGE(final_col == p.second, p.first.data()); + passed++; + + if (passed > 20) { + break; + } + } + } +} +} +; +} diff --git a/src/libserver/css/css_value.hxx b/src/libserver/css/css_value.hxx new file mode 100644 index 0000000..1d57421 --- /dev/null +++ b/src/libserver/css/css_value.hxx @@ -0,0 +1,174 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_CSS_VALUE_HXX +#define RSPAMD_CSS_VALUE_HXX + +#include <string> +#include <variant> +#include <optional> +#include <vector> +#include <iosfwd> +#include "parse_error.hxx" +#include "css_parser.hxx" +#include "contrib/expected/expected.hpp" + +namespace rspamd::css { + +struct alignas(int) css_color { + std::uint8_t r; + std::uint8_t g; + std::uint8_t b; + + std::uint8_t alpha; + + css_color(std::uint8_t _r, std::uint8_t _g, std::uint8_t _b, std::uint8_t _alpha = 255) + : r(_r), g(_g), b(_b), alpha(_alpha) + { + } + css_color() = default; + constexpr auto to_number() const -> std::uint32_t + { + return (std::uint32_t) alpha << 24 | + (std::uint32_t) r << 16 | + (std::uint32_t) g << 8 | + (std::uint32_t) b << 0; + } + + constexpr auto to_rgb() const -> std::uint32_t + { + return (std::uint32_t) r << 16 | + (std::uint32_t) g << 8 | + (std::uint32_t) b << 0; + } + friend bool operator==(const css_color &l, const css_color &r) + { + return (memcmp(&l, &r, sizeof(css_color)) == 0); + } + + static auto white() -> css_color + { + return css_color{255, 255, 255}; + } + static auto black() -> css_color + { + return css_color{0, 0, 0}; + } +}; + +struct css_dimension { + float dim; + bool is_percent; +}; + +/* + * Simple enum class for display stuff + */ +enum class css_display_value : std::uint8_t { + DISPLAY_INLINE, + DISPLAY_BLOCK, + DISPLAY_TABLE_ROW, + DISPLAY_HIDDEN +}; + +/* + * Value handler, uses std::variant instead of polymorphic classes for now + * for simplicity + */ +struct css_value { + std::variant<css_color, + float, + css_display_value, + css_dimension, + std::monostate> + value; + + css_value() + { + } + css_value(const css_color &color) + : value(color) + { + } + css_value(float num) + : value(num) + { + } + css_value(css_dimension dim) + : value(dim) + { + } + css_value(css_display_value d) + : value(d) + { + } + + auto to_color(void) const -> std::optional<css_color> + { + return extract_value_maybe<css_color>(); + } + + auto to_number(void) const -> std::optional<float> + { + return extract_value_maybe<float>(); + } + + auto to_dimension(void) const -> std::optional<css_dimension> + { + return extract_value_maybe<css_dimension>(); + } + + auto to_display(void) const -> std::optional<css_display_value> + { + return extract_value_maybe<css_display_value>(); + } + + auto is_valid(void) const -> bool + { + return !(std::holds_alternative<std::monostate>(value)); + } + + auto debug_str() const -> std::string; + + static auto maybe_color_from_string(const std::string_view &input) + -> std::optional<css_value>; + static auto maybe_color_from_hex(const std::string_view &input) + -> std::optional<css_value>; + static auto maybe_color_from_function(const css_consumed_block::css_function_block &func) + -> std::optional<css_value>; + static auto maybe_dimension_from_number(const css_parser_token &tok) + -> std::optional<css_value>; + static auto maybe_display_from_string(const std::string_view &input) + -> std::optional<css_value>; + +private: + template<typename T> + auto extract_value_maybe(void) const -> std::optional<T> + { + if (std::holds_alternative<T>(value)) { + return std::get<T>(value); + } + + return std::nullopt; + } +}; + +}// namespace rspamd::css + + +#endif//RSPAMD_CSS_VALUE_HXX diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx new file mode 100644 index 0000000..22b76f0 --- /dev/null +++ b/src/libserver/css/parse_error.hxx @@ -0,0 +1,61 @@ +/*- + * Copyright 2021 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef RSPAMD_PARSE_ERROR_HXX +#define RSPAMD_PARSE_ERROR_HXX + +#include <string> +#include <optional> + +namespace rspamd::css { + +/* + * Generic parser errors + */ +enum class css_parse_error_type { + PARSE_ERROR_UNKNOWN_OPTION, + PARSE_ERROR_INVALID_SYNTAX, + PARSE_ERROR_BAD_NESTING, + PARSE_ERROR_NYI, + PARSE_ERROR_UNKNOWN_ERROR, + /* All above is treated as fatal error in parsing */ + PARSE_ERROR_NO_ERROR, + PARSE_ERROR_EMPTY, +}; + +struct css_parse_error { + css_parse_error_type type = css_parse_error_type::PARSE_ERROR_UNKNOWN_ERROR; + std::optional<std::string> description; + + explicit css_parse_error(css_parse_error_type type, const std::string &description) + : type(type), description(description) + { + } + explicit css_parse_error(css_parse_error_type type = css_parse_error_type::PARSE_ERROR_NO_ERROR) + : type(type) + { + } + + constexpr auto is_fatal(void) const -> bool + { + return type < css_parse_error_type::PARSE_ERROR_NO_ERROR; + } +}; + +}// namespace rspamd::css +#endif//RSPAMD_PARSE_ERROR_HXX |