summaryrefslogtreecommitdiffstats
path: root/src/libserver/css
diff options
context:
space:
mode:
Diffstat (limited to 'src/libserver/css')
-rw-r--r--src/libserver/css/CMakeLists.txt9
-rw-r--r--src/libserver/css/css.cxx227
-rw-r--r--src/libserver/css/css.hxx68
-rw-r--r--src/libserver/css/css_colors_list.hxx738
-rw-r--r--src/libserver/css/css_parser.cxx892
-rw-r--r--src/libserver/css/css_parser.hxx244
-rw-r--r--src/libserver/css/css_property.cxx69
-rw-r--r--src/libserver/css/css_property.hxx172
-rw-r--r--src/libserver/css/css_rule.cxx531
-rw-r--r--src/libserver/css/css_rule.hxx153
-rw-r--r--src/libserver/css/css_rule_parser.rl27
-rw-r--r--src/libserver/css/css_selector.cxx226
-rw-r--r--src/libserver/css/css_selector.hxx134
-rw-r--r--src/libserver/css/css_selector_parser.rl27
-rw-r--r--src/libserver/css/css_style.hxx66
-rw-r--r--src/libserver/css/css_syntax.rl110
-rw-r--r--src/libserver/css/css_tokeniser.cxx836
-rw-r--r--src/libserver/css/css_tokeniser.hxx215
-rw-r--r--src/libserver/css/css_util.cxx157
-rw-r--r--src/libserver/css/css_util.hxx37
-rw-r--r--src/libserver/css/css_value.cxx449
-rw-r--r--src/libserver/css/css_value.hxx174
-rw-r--r--src/libserver/css/parse_error.hxx61
23 files changed, 5622 insertions, 0 deletions
diff --git a/src/libserver/css/CMakeLists.txt b/src/libserver/css/CMakeLists.txt
new file mode 100644
index 0000000..c0c9d51
--- /dev/null
+++ b/src/libserver/css/CMakeLists.txt
@@ -0,0 +1,9 @@
+SET(LIBCSSSRC "${CMAKE_CURRENT_SOURCE_DIR}/css.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_property.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_value.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_selector.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_tokeniser.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_util.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_rule.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_parser.cxx"
+ PARENT_SCOPE) \ No newline at end of file
diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx
new file mode 100644
index 0000000..1b369ed
--- /dev/null
+++ b/src/libserver/css/css.cxx
@@ -0,0 +1,227 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css.hxx"
+#include "contrib/ankerl/unordered_dense.h"
+#include "css_parser.hxx"
+#include "libserver/html/html_tag.hxx"
+#include "libserver/html/html_block.hxx"
+
+/* Keep unit tests implementation here (it'll possibly be moved outside one day) */
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#define DOCTEST_CONFIG_IMPLEMENT
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+INIT_LOG_MODULE_PUBLIC(css);
+
+class css_style_sheet::impl {
+public:
+ using sel_shared_hash = smart_ptr_hash<css_selector>;
+ using sel_shared_eq = smart_ptr_equal<css_selector>;
+ using selector_ptr = std::unique_ptr<css_selector>;
+ using selectors_hash = ankerl::unordered_dense::map<selector_ptr, css_declarations_block_ptr,
+ sel_shared_hash, sel_shared_eq>;
+ using universal_selector_t = std::pair<selector_ptr, css_declarations_block_ptr>;
+ selectors_hash tags_selector;
+ selectors_hash class_selectors;
+ selectors_hash id_selectors;
+ std::optional<universal_selector_t> universal_selector;
+};
+
+css_style_sheet::css_style_sheet(rspamd_mempool_t *pool)
+ : pool(pool), pimpl(new impl)
+{
+}
+css_style_sheet::~css_style_sheet()
+{
+}
+
+auto css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector,
+ css_declarations_block_ptr decls) -> void
+{
+ impl::selectors_hash *target_hash = nullptr;
+
+ switch (selector->type) {
+ case css_selector::selector_type::SELECTOR_ALL:
+ if (pimpl->universal_selector) {
+ /* Another universal selector */
+ msg_debug_css("redefined universal selector, merging rules");
+ pimpl->universal_selector->second->merge_block(*decls);
+ }
+ else {
+ msg_debug_css("added universal selector");
+ pimpl->universal_selector = std::make_pair(std::move(selector),
+ decls);
+ }
+ break;
+ case css_selector::selector_type::SELECTOR_CLASS:
+ target_hash = &pimpl->class_selectors;
+ break;
+ case css_selector::selector_type::SELECTOR_ID:
+ target_hash = &pimpl->id_selectors;
+ break;
+ case css_selector::selector_type::SELECTOR_TAG:
+ target_hash = &pimpl->tags_selector;
+ break;
+ }
+
+ if (target_hash) {
+ auto found_it = target_hash->find(selector);
+
+ if (found_it == target_hash->end()) {
+ /* Easy case, new element */
+ target_hash->insert({std::move(selector), decls});
+ }
+ else {
+ /* The problem with merging is actually in how to handle selectors chains
+ * For example, we have 2 selectors:
+ * 1. class id tag -> meaning that we first match class, then we ensure that
+ * id is also the same and finally we check the tag
+ * 2. tag class id -> it means that we check first tag, then class and then id
+ * So we have somehow equal path in the xpath terms.
+ * I suppose now, that we merely check parent stuff and handle duplicates
+ * merging when finally resolving paths.
+ */
+ auto sel_str = selector->to_string().value_or("unknown");
+ msg_debug_css("found duplicate selector: %*s", (int) sel_str.size(),
+ sel_str.data());
+ found_it->second->merge_block(*decls);
+ }
+ }
+}
+
+auto css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block *
+{
+ std::optional<std::string_view> id_comp, class_comp;
+ rspamd::html::html_block *res = nullptr;
+
+ if (!tag) {
+ return nullptr;
+ }
+
+ /* First, find id in a tag and a class */
+ for (const auto &param: tag->components) {
+ if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) {
+ id_comp = param.value;
+ }
+ else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
+ class_comp = param.value;
+ }
+ }
+
+ /* ID part */
+ if (id_comp && !pimpl->id_selectors.empty()) {
+ auto found_id_sel = pimpl->id_selectors.find(css_selector{id_comp.value()});
+
+ if (found_id_sel != pimpl->id_selectors.end()) {
+ const auto &decl = *(found_id_sel->second);
+ res = decl.compile_to_block(pool);
+ }
+ }
+
+ /* Class part */
+ if (class_comp && !pimpl->class_selectors.empty()) {
+ auto sv_split = [](auto strv, std::string_view delims = " ") -> std::vector<std::string_view> {
+ std::vector<decltype(strv)> ret;
+ std::size_t start = 0;
+
+ while (start < strv.size()) {
+ const auto last = strv.find_first_of(delims, start);
+ if (start != last) {
+ ret.emplace_back(strv.substr(start, last - start));
+ }
+
+ if (last == std::string_view::npos) {
+ break;
+ }
+
+ start = last + 1;
+ }
+
+ return ret;
+ };
+
+ auto elts = sv_split(class_comp.value());
+
+ for (const auto &e: elts) {
+ auto found_class_sel = pimpl->class_selectors.find(
+ css_selector{e, css_selector::selector_type::SELECTOR_CLASS});
+
+ if (found_class_sel != pimpl->class_selectors.end()) {
+ const auto &decl = *(found_class_sel->second);
+ auto *tmp = decl.compile_to_block(pool);
+
+ if (res == nullptr) {
+ res = tmp;
+ }
+ else {
+ res->propagate_block(*tmp);
+ }
+ }
+ }
+ }
+
+ /* Tags part */
+ if (!pimpl->tags_selector.empty()) {
+ auto found_tag_sel = pimpl->tags_selector.find(
+ css_selector{static_cast<tag_id_t>(tag->id)});
+
+ if (found_tag_sel != pimpl->tags_selector.end()) {
+ const auto &decl = *(found_tag_sel->second);
+ auto *tmp = decl.compile_to_block(pool);
+
+ if (res == nullptr) {
+ res = tmp;
+ }
+ else {
+ res->propagate_block(*tmp);
+ }
+ }
+ }
+
+ /* Finally, universal selector */
+ if (pimpl->universal_selector) {
+ auto *tmp = pimpl->universal_selector->second->compile_to_block(pool);
+
+ if (res == nullptr) {
+ res = tmp;
+ }
+ else {
+ res->propagate_block(*tmp);
+ }
+ }
+
+ return res;
+}
+
+auto css_parse_style(rspamd_mempool_t *pool,
+ std::string_view input,
+ std::shared_ptr<css_style_sheet> &&existing)
+ -> css_return_pair
+{
+ auto parse_res = rspamd::css::parse_css(pool, input,
+ std::forward<std::shared_ptr<css_style_sheet>>(existing));
+
+ if (parse_res.has_value()) {
+ return std::make_pair(parse_res.value(), css_parse_error());
+ }
+
+ return std::make_pair(nullptr, parse_res.error());
+}
+
+}// namespace rspamd::css \ No newline at end of file
diff --git a/src/libserver/css/css.hxx b/src/libserver/css/css.hxx
new file mode 100644
index 0000000..f0f8120
--- /dev/null
+++ b/src/libserver/css/css.hxx
@@ -0,0 +1,68 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef RSPAMD_CSS_HXX
+#define RSPAMD_CSS_HXX
+
+#include <string>
+#include <memory>
+#include "logger.h"
+#include "css_rule.hxx"
+#include "css_selector.hxx"
+
+namespace rspamd::html {
+/* Forward declaration */
+struct html_tag;
+struct html_block;
+}// namespace rspamd::html
+
+namespace rspamd::css {
+
+extern int rspamd_css_log_id;
+
+#define msg_debug_css(...) rspamd_conditional_debug_fast(NULL, NULL, \
+ rspamd_css_log_id, "css", pool->tag.uid, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+#define msg_err_css(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
+ "css", pool->tag.uid, \
+ __FUNCTION__, \
+ __VA_ARGS__)
+
+class css_style_sheet {
+public:
+ css_style_sheet(rspamd_mempool_t *pool);
+ ~css_style_sheet(); /* must be declared separately due to pimpl */
+ auto add_selector_rule(std::unique_ptr<css_selector> &&selector,
+ css_declarations_block_ptr decls) -> void;
+
+ auto check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block *;
+
+private:
+ class impl;
+ rspamd_mempool_t *pool;
+ std::unique_ptr<impl> pimpl;
+};
+
+using css_return_pair = std::pair<std::shared_ptr<css_style_sheet>, css_parse_error>;
+auto css_parse_style(rspamd_mempool_t *pool,
+ std::string_view input,
+ std::shared_ptr<css_style_sheet> &&existing) -> css_return_pair;
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_H \ No newline at end of file
diff --git a/src/libserver/css/css_colors_list.hxx b/src/libserver/css/css_colors_list.hxx
new file mode 100644
index 0000000..6dfe54f
--- /dev/null
+++ b/src/libserver/css/css_colors_list.hxx
@@ -0,0 +1,738 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_CSS_COLORS_LIST_HXX
+#define RSPAMD_CSS_COLORS_LIST_HXX
+
+#pragma once
+
+#include <string_view>
+#include "contrib/ankerl/unordered_dense.h"
+#include "css_value.hxx"
+
+namespace rspamd::css {
+
+/*
+ * List of all colors, intended to use with hashes/sets
+ * TODO: think about frozen structs when we can deal with 700 values without
+ * compiler limits...
+ */
+static const ankerl::unordered_dense::map<std::string_view, css_color> css_colors_map{
+ {"aliceblue", {240, 248, 255}},
+ {"antiquewhite", {250, 235, 215}},
+ {"antiquewhite1", {255, 239, 219}},
+ {"antiquewhite2", {238, 223, 204}},
+ {"antiquewhite3", {205, 192, 176}},
+ {"antiquewhite4", {139, 131, 120}},
+ {"aqua", {0, 255, 255}},
+ {"aquamarine", {127, 255, 212}},
+ {"aquamarine1", {127, 255, 212}},
+ {"aquamarine2", {118, 238, 198}},
+ {"aquamarine3", {102, 205, 170}},
+ {"aquamarine4", {69, 139, 116}},
+ {"azure", {240, 255, 255}},
+ {"azure1", {240, 255, 255}},
+ {"azure2", {224, 238, 238}},
+ {"azure3", {193, 205, 205}},
+ {"azure4", {131, 139, 139}},
+ {"beige", {245, 245, 220}},
+ {"bisque", {255, 228, 196}},
+ {"bisque1", {255, 228, 196}},
+ {"bisque2", {238, 213, 183}},
+ {"bisque3", {205, 183, 158}},
+ {"bisque4", {139, 125, 107}},
+ {"black", {0, 0, 0}},
+ {"blanchedalmond", {255, 235, 205}},
+ {"blue", {0, 0, 255}},
+ {"blue1", {0, 0, 255}},
+ {"blue2", {0, 0, 238}},
+ {"blue3", {0, 0, 205}},
+ {"blue4", {0, 0, 139}},
+ {"blueviolet", {138, 43, 226}},
+ {"brown", {165, 42, 42}},
+ {"brown1", {255, 64, 64}},
+ {"brown2", {238, 59, 59}},
+ {"brown3", {205, 51, 51}},
+ {"brown4", {139, 35, 35}},
+ {"burlywood", {222, 184, 135}},
+ {"burlywood1", {255, 211, 155}},
+ {"burlywood2", {238, 197, 145}},
+ {"burlywood3", {205, 170, 125}},
+ {"burlywood4", {139, 115, 85}},
+ {"cadetblue", {95, 158, 160}},
+ {"cadetblue1", {152, 245, 255}},
+ {"cadetblue2", {142, 229, 238}},
+ {"cadetblue3", {122, 197, 205}},
+ {"cadetblue4", {83, 134, 139}},
+ {"chartreuse", {127, 255, 0}},
+ {"chartreuse1", {127, 255, 0}},
+ {"chartreuse2", {118, 238, 0}},
+ {"chartreuse3", {102, 205, 0}},
+ {"chartreuse4", {69, 139, 0}},
+ {"chocolate", {210, 105, 30}},
+ {"chocolate1", {255, 127, 36}},
+ {"chocolate2", {238, 118, 33}},
+ {"chocolate3", {205, 102, 29}},
+ {"chocolate4", {139, 69, 19}},
+ {"coral", {255, 127, 80}},
+ {"coral1", {255, 114, 86}},
+ {"coral2", {238, 106, 80}},
+ {"coral3", {205, 91, 69}},
+ {"coral4", {139, 62, 47}},
+ {"cornflowerblue", {100, 149, 237}},
+ {"cornsilk", {255, 248, 220}},
+ {"cornsilk1", {255, 248, 220}},
+ {"cornsilk2", {238, 232, 205}},
+ {"cornsilk3", {205, 200, 177}},
+ {"cornsilk4", {139, 136, 120}},
+ {"crimson", {220, 20, 60}},
+ {"cyan", {0, 255, 255}},
+ {"cyan1", {0, 255, 255}},
+ {"cyan2", {0, 238, 238}},
+ {"cyan3", {0, 205, 205}},
+ {"cyan4", {0, 139, 139}},
+ {"darkblue", {0, 0, 139}},
+ {"darkcyan", {0, 139, 139}},
+ {"darkgoldenrod", {184, 134, 11}},
+ {"darkgoldenrod1", {255, 185, 15}},
+ {"darkgoldenrod2", {238, 173, 14}},
+ {"darkgoldenrod3", {205, 149, 12}},
+ {"darkgoldenrod4", {139, 101, 8}},
+ {"darkgray", {169, 169, 169}},
+ {"darkgreen", {0, 100, 0}},
+ {"darkgrey", {169, 169, 169}},
+ {"darkkhaki", {189, 183, 107}},
+ {"darkmagenta", {139, 0, 139}},
+ {"darkolivegreen", {85, 107, 47}},
+ {"darkolivegreen1", {202, 255, 112}},
+ {"darkolivegreen2", {188, 238, 104}},
+ {"darkolivegreen3", {162, 205, 90}},
+ {"darkolivegreen4", {110, 139, 61}},
+ {"darkorange", {255, 140, 0}},
+ {"darkorange1", {255, 127, 0}},
+ {"darkorange2", {238, 118, 0}},
+ {"darkorange3", {205, 102, 0}},
+ {"darkorange4", {139, 69, 0}},
+ {"darkorchid", {153, 50, 204}},
+ {"darkorchid1", {191, 62, 255}},
+ {"darkorchid2", {178, 58, 238}},
+ {"darkorchid3", {154, 50, 205}},
+ {"darkorchid4", {104, 34, 139}},
+ {"darkred", {139, 0, 0}},
+ {"darksalmon", {233, 150, 122}},
+ {"darkseagreen", {143, 188, 143}},
+ {"darkseagreen1", {193, 255, 193}},
+ {"darkseagreen2", {180, 238, 180}},
+ {"darkseagreen3", {155, 205, 155}},
+ {"darkseagreen4", {105, 139, 105}},
+ {"darkslateblue", {72, 61, 139}},
+ {"darkslategray", {47, 79, 79}},
+ {"darkslategray1", {151, 255, 255}},
+ {"darkslategray2", {141, 238, 238}},
+ {"darkslategray3", {121, 205, 205}},
+ {"darkslategray4", {82, 139, 139}},
+ {"darkslategrey", {47, 79, 79}},
+ {"darkturquoise", {0, 206, 209}},
+ {"darkviolet", {148, 0, 211}},
+ {"deeppink", {255, 20, 147}},
+ {"deeppink1", {255, 20, 147}},
+ {"deeppink2", {238, 18, 137}},
+ {"deeppink3", {205, 16, 118}},
+ {"deeppink4", {139, 10, 80}},
+ {"deepskyblue", {0, 191, 255}},
+ {"deepskyblue1", {0, 191, 255}},
+ {"deepskyblue2", {0, 178, 238}},
+ {"deepskyblue3", {0, 154, 205}},
+ {"deepskyblue4", {0, 104, 139}},
+ {"dimgray", {105, 105, 105}},
+ {"dimgrey", {105, 105, 105}},
+ {"dodgerblue", {30, 144, 255}},
+ {"dodgerblue1", {30, 144, 255}},
+ {"dodgerblue2", {28, 134, 238}},
+ {"dodgerblue3", {24, 116, 205}},
+ {"dodgerblue4", {16, 78, 139}},
+ {"firebrick", {178, 34, 34}},
+ {"firebrick1", {255, 48, 48}},
+ {"firebrick2", {238, 44, 44}},
+ {"firebrick3", {205, 38, 38}},
+ {"firebrick4", {139, 26, 26}},
+ {"floralwhite", {255, 250, 240}},
+ {"forestgreen", {34, 139, 34}},
+ {"fuchsia", {255, 0, 255}},
+ {"gainsboro", {220, 220, 220}},
+ {"ghostwhite", {248, 248, 255}},
+ {"gold", {255, 215, 0}},
+ {"gold1", {255, 215, 0}},
+ {"gold2", {238, 201, 0}},
+ {"gold3", {205, 173, 0}},
+ {"gold4", {139, 117, 0}},
+ {"goldenrod", {218, 165, 32}},
+ {"goldenrod1", {255, 193, 37}},
+ {"goldenrod2", {238, 180, 34}},
+ {"goldenrod3", {205, 155, 29}},
+ {"goldenrod4", {139, 105, 20}},
+ {"gray", {190, 190, 190}},
+ {"gray0", {0, 0, 0}},
+ {"gray1", {3, 3, 3}},
+ {"gray10", {26, 26, 26}},
+ {"gray100", {255, 255, 255}},
+ {"gray11", {28, 28, 28}},
+ {"gray12", {31, 31, 31}},
+ {"gray13", {33, 33, 33}},
+ {"gray14", {36, 36, 36}},
+ {"gray15", {38, 38, 38}},
+ {"gray16", {41, 41, 41}},
+ {"gray17", {43, 43, 43}},
+ {"gray18", {46, 46, 46}},
+ {"gray19", {48, 48, 48}},
+ {"gray2", {5, 5, 5}},
+ {"gray20", {51, 51, 51}},
+ {"gray21", {54, 54, 54}},
+ {"gray22", {56, 56, 56}},
+ {"gray23", {59, 59, 59}},
+ {"gray24", {61, 61, 61}},
+ {"gray25", {64, 64, 64}},
+ {"gray26", {66, 66, 66}},
+ {"gray27", {69, 69, 69}},
+ {"gray28", {71, 71, 71}},
+ {"gray29", {74, 74, 74}},
+ {"gray3", {8, 8, 8}},
+ {"gray30", {77, 77, 77}},
+ {"gray31", {79, 79, 79}},
+ {"gray32", {82, 82, 82}},
+ {"gray33", {84, 84, 84}},
+ {"gray34", {87, 87, 87}},
+ {"gray35", {89, 89, 89}},
+ {"gray36", {92, 92, 92}},
+ {"gray37", {94, 94, 94}},
+ {"gray38", {97, 97, 97}},
+ {"gray39", {99, 99, 99}},
+ {"gray4", {10, 10, 10}},
+ {"gray40", {102, 102, 102}},
+ {"gray41", {105, 105, 105}},
+ {"gray42", {107, 107, 107}},
+ {"gray43", {110, 110, 110}},
+ {"gray44", {112, 112, 112}},
+ {"gray45", {115, 115, 115}},
+ {"gray46", {117, 117, 117}},
+ {"gray47", {120, 120, 120}},
+ {"gray48", {122, 122, 122}},
+ {"gray49", {125, 125, 125}},
+ {"gray5", {13, 13, 13}},
+ {"gray50", {127, 127, 127}},
+ {"gray51", {130, 130, 130}},
+ {"gray52", {133, 133, 133}},
+ {"gray53", {135, 135, 135}},
+ {"gray54", {138, 138, 138}},
+ {"gray55", {140, 140, 140}},
+ {"gray56", {143, 143, 143}},
+ {"gray57", {145, 145, 145}},
+ {"gray58", {148, 148, 148}},
+ {"gray59", {150, 150, 150}},
+ {"gray6", {15, 15, 15}},
+ {"gray60", {153, 153, 153}},
+ {"gray61", {156, 156, 156}},
+ {"gray62", {158, 158, 158}},
+ {"gray63", {161, 161, 161}},
+ {"gray64", {163, 163, 163}},
+ {"gray65", {166, 166, 166}},
+ {"gray66", {168, 168, 168}},
+ {"gray67", {171, 171, 171}},
+ {"gray68", {173, 173, 173}},
+ {"gray69", {176, 176, 176}},
+ {"gray7", {18, 18, 18}},
+ {"gray70", {179, 179, 179}},
+ {"gray71", {181, 181, 181}},
+ {"gray72", {184, 184, 184}},
+ {"gray73", {186, 186, 186}},
+ {"gray74", {189, 189, 189}},
+ {"gray75", {191, 191, 191}},
+ {"gray76", {194, 194, 194}},
+ {"gray77", {196, 196, 196}},
+ {"gray78", {199, 199, 199}},
+ {"gray79", {201, 201, 201}},
+ {"gray8", {20, 20, 20}},
+ {"gray80", {204, 204, 204}},
+ {"gray81", {207, 207, 207}},
+ {"gray82", {209, 209, 209}},
+ {"gray83", {212, 212, 212}},
+ {"gray84", {214, 214, 214}},
+ {"gray85", {217, 217, 217}},
+ {"gray86", {219, 219, 219}},
+ {"gray87", {222, 222, 222}},
+ {"gray88", {224, 224, 224}},
+ {"gray89", {227, 227, 227}},
+ {"gray9", {23, 23, 23}},
+ {"gray90", {229, 229, 229}},
+ {"gray91", {232, 232, 232}},
+ {"gray92", {235, 235, 235}},
+ {"gray93", {237, 237, 237}},
+ {"gray94", {240, 240, 240}},
+ {"gray95", {242, 242, 242}},
+ {"gray96", {245, 245, 245}},
+ {"gray97", {247, 247, 247}},
+ {"gray98", {250, 250, 250}},
+ {"gray99", {252, 252, 252}},
+ {"green", {0, 255, 0}},
+ {"green1", {0, 255, 0}},
+ {"green2", {0, 238, 0}},
+ {"green3", {0, 205, 0}},
+ {"green4", {0, 139, 0}},
+ {"greenyellow", {173, 255, 47}},
+ {"grey", {190, 190, 190}},
+ {"grey0", {0, 0, 0}},
+ {"grey1", {3, 3, 3}},
+ {"grey10", {26, 26, 26}},
+ {"grey100", {255, 255, 255}},
+ {"grey11", {28, 28, 28}},
+ {"grey12", {31, 31, 31}},
+ {"grey13", {33, 33, 33}},
+ {"grey14", {36, 36, 36}},
+ {"grey15", {38, 38, 38}},
+ {"grey16", {41, 41, 41}},
+ {"grey17", {43, 43, 43}},
+ {"grey18", {46, 46, 46}},
+ {"grey19", {48, 48, 48}},
+ {"grey2", {5, 5, 5}},
+ {"grey20", {51, 51, 51}},
+ {"grey21", {54, 54, 54}},
+ {"grey22", {56, 56, 56}},
+ {"grey23", {59, 59, 59}},
+ {"grey24", {61, 61, 61}},
+ {"grey25", {64, 64, 64}},
+ {"grey26", {66, 66, 66}},
+ {"grey27", {69, 69, 69}},
+ {"grey28", {71, 71, 71}},
+ {"grey29", {74, 74, 74}},
+ {"grey3", {8, 8, 8}},
+ {"grey30", {77, 77, 77}},
+ {"grey31", {79, 79, 79}},
+ {"grey32", {82, 82, 82}},
+ {"grey33", {84, 84, 84}},
+ {"grey34", {87, 87, 87}},
+ {"grey35", {89, 89, 89}},
+ {"grey36", {92, 92, 92}},
+ {"grey37", {94, 94, 94}},
+ {"grey38", {97, 97, 97}},
+ {"grey39", {99, 99, 99}},
+ {"grey4", {10, 10, 10}},
+ {"grey40", {102, 102, 102}},
+ {"grey41", {105, 105, 105}},
+ {"grey42", {107, 107, 107}},
+ {"grey43", {110, 110, 110}},
+ {"grey44", {112, 112, 112}},
+ {"grey45", {115, 115, 115}},
+ {"grey46", {117, 117, 117}},
+ {"grey47", {120, 120, 120}},
+ {"grey48", {122, 122, 122}},
+ {"grey49", {125, 125, 125}},
+ {"grey5", {13, 13, 13}},
+ {"grey50", {127, 127, 127}},
+ {"grey51", {130, 130, 130}},
+ {"grey52", {133, 133, 133}},
+ {"grey53", {135, 135, 135}},
+ {"grey54", {138, 138, 138}},
+ {"grey55", {140, 140, 140}},
+ {"grey56", {143, 143, 143}},
+ {"grey57", {145, 145, 145}},
+ {"grey58", {148, 148, 148}},
+ {"grey59", {150, 150, 150}},
+ {"grey6", {15, 15, 15}},
+ {"grey60", {153, 153, 153}},
+ {"grey61", {156, 156, 156}},
+ {"grey62", {158, 158, 158}},
+ {"grey63", {161, 161, 161}},
+ {"grey64", {163, 163, 163}},
+ {"grey65", {166, 166, 166}},
+ {"grey66", {168, 168, 168}},
+ {"grey67", {171, 171, 171}},
+ {"grey68", {173, 173, 173}},
+ {"grey69", {176, 176, 176}},
+ {"grey7", {18, 18, 18}},
+ {"grey70", {179, 179, 179}},
+ {"grey71", {181, 181, 181}},
+ {"grey72", {184, 184, 184}},
+ {"grey73", {186, 186, 186}},
+ {"grey74", {189, 189, 189}},
+ {"grey75", {191, 191, 191}},
+ {"grey76", {194, 194, 194}},
+ {"grey77", {196, 196, 196}},
+ {"grey78", {199, 199, 199}},
+ {"grey79", {201, 201, 201}},
+ {"grey8", {20, 20, 20}},
+ {"grey80", {204, 204, 204}},
+ {"grey81", {207, 207, 207}},
+ {"grey82", {209, 209, 209}},
+ {"grey83", {212, 212, 212}},
+ {"grey84", {214, 214, 214}},
+ {"grey85", {217, 217, 217}},
+ {"grey86", {219, 219, 219}},
+ {"grey87", {222, 222, 222}},
+ {"grey88", {224, 224, 224}},
+ {"grey89", {227, 227, 227}},
+ {"grey9", {23, 23, 23}},
+ {"grey90", {229, 229, 229}},
+ {"grey91", {232, 232, 232}},
+ {"grey92", {235, 235, 235}},
+ {"grey93", {237, 237, 237}},
+ {"grey94", {240, 240, 240}},
+ {"grey95", {242, 242, 242}},
+ {"grey96", {245, 245, 245}},
+ {"grey97", {247, 247, 247}},
+ {"grey98", {250, 250, 250}},
+ {"grey99", {252, 252, 252}},
+ {"honeydew", {240, 255, 240}},
+ {"honeydew1", {240, 255, 240}},
+ {"honeydew2", {224, 238, 224}},
+ {"honeydew3", {193, 205, 193}},
+ {"honeydew4", {131, 139, 131}},
+ {"hotpink", {255, 105, 180}},
+ {"hotpink1", {255, 110, 180}},
+ {"hotpink2", {238, 106, 167}},
+ {"hotpink3", {205, 96, 144}},
+ {"hotpink4", {139, 58, 98}},
+ {"indianred", {205, 92, 92}},
+ {"indianred1", {255, 106, 106}},
+ {"indianred2", {238, 99, 99}},
+ {"indianred3", {205, 85, 85}},
+ {"indianred4", {139, 58, 58}},
+ {"indigo", {75, 0, 130}},
+ {"ivory", {255, 255, 240}},
+ {"ivory1", {255, 255, 240}},
+ {"ivory2", {238, 238, 224}},
+ {"ivory3", {205, 205, 193}},
+ {"ivory4", {139, 139, 131}},
+ {"khaki", {240, 230, 140}},
+ {"khaki1", {255, 246, 143}},
+ {"khaki2", {238, 230, 133}},
+ {"khaki3", {205, 198, 115}},
+ {"khaki4", {139, 134, 78}},
+ {"lavender", {230, 230, 250}},
+ {"lavenderblush", {255, 240, 245}},
+ {"lavenderblush1", {255, 240, 245}},
+ {"lavenderblush2", {238, 224, 229}},
+ {"lavenderblush3", {205, 193, 197}},
+ {"lavenderblush4", {139, 131, 134}},
+ {"lawngreen", {124, 252, 0}},
+ {"lemonchiffon", {255, 250, 205}},
+ {"lemonchiffon1", {255, 250, 205}},
+ {"lemonchiffon2", {238, 233, 191}},
+ {"lemonchiffon3", {205, 201, 165}},
+ {"lemonchiffon4", {139, 137, 112}},
+ {"lightblue", {173, 216, 230}},
+ {"lightblue1", {191, 239, 255}},
+ {"lightblue2", {178, 223, 238}},
+ {"lightblue3", {154, 192, 205}},
+ {"lightblue4", {104, 131, 139}},
+ {"lightcoral", {240, 128, 128}},
+ {"lightcyan", {224, 255, 255}},
+ {"lightcyan1", {224, 255, 255}},
+ {"lightcyan2", {209, 238, 238}},
+ {"lightcyan3", {180, 205, 205}},
+ {"lightcyan4", {122, 139, 139}},
+ {"lightgoldenrod", {238, 221, 130}},
+ {"lightgoldenrod1", {255, 236, 139}},
+ {"lightgoldenrod2", {238, 220, 130}},
+ {"lightgoldenrod3", {205, 190, 112}},
+ {"lightgoldenrod4", {139, 129, 76}},
+ {"lightgoldenrodyellow", {250, 250, 210}},
+ {"lightgray", {211, 211, 211}},
+ {"lightgreen", {144, 238, 144}},
+ {"lightgrey", {211, 211, 211}},
+ {"lightpink", {255, 182, 193}},
+ {"lightpink1", {255, 174, 185}},
+ {"lightpink2", {238, 162, 173}},
+ {"lightpink3", {205, 140, 149}},
+ {"lightpink4", {139, 95, 101}},
+ {"lightsalmon", {255, 160, 122}},
+ {"lightsalmon1", {255, 160, 122}},
+ {"lightsalmon2", {238, 149, 114}},
+ {"lightsalmon3", {205, 129, 98}},
+ {"lightsalmon4", {139, 87, 66}},
+ {"lightseagreen", {32, 178, 170}},
+ {"lightskyblue", {135, 206, 250}},
+ {"lightskyblue1", {176, 226, 255}},
+ {"lightskyblue2", {164, 211, 238}},
+ {"lightskyblue3", {141, 182, 205}},
+ {"lightskyblue4", {96, 123, 139}},
+ {"lightslateblue", {132, 112, 255}},
+ {"lightslategray", {119, 136, 153}},
+ {"lightslategrey", {119, 136, 153}},
+ {"lightsteelblue", {176, 196, 222}},
+ {"lightsteelblue1", {202, 225, 255}},
+ {"lightsteelblue2", {188, 210, 238}},
+ {"lightsteelblue3", {162, 181, 205}},
+ {"lightsteelblue4", {110, 123, 139}},
+ {"lightyellow", {255, 255, 224}},
+ {"lightyellow1", {255, 255, 224}},
+ {"lightyellow2", {238, 238, 209}},
+ {"lightyellow3", {205, 205, 180}},
+ {"lightyellow4", {139, 139, 122}},
+ {"lime", {0, 255, 0}},
+ {"limegreen", {50, 205, 50}},
+ {"linen", {250, 240, 230}},
+ {"magenta", {255, 0, 255}},
+ {"magenta1", {255, 0, 255}},
+ {"magenta2", {238, 0, 238}},
+ {"magenta3", {205, 0, 205}},
+ {"magenta4", {139, 0, 139}},
+ {"maroon", {176, 48, 96}},
+ {"maroon1", {255, 52, 179}},
+ {"maroon2", {238, 48, 167}},
+ {"maroon3", {205, 41, 144}},
+ {"maroon4", {139, 28, 98}},
+ {"mediumaquamarine", {102, 205, 170}},
+ {"mediumblue", {0, 0, 205}},
+ {"mediumorchid", {186, 85, 211}},
+ {"mediumorchid1", {224, 102, 255}},
+ {"mediumorchid2", {209, 95, 238}},
+ {"mediumorchid3", {180, 82, 205}},
+ {"mediumorchid4", {122, 55, 139}},
+ {"mediumpurple", {147, 112, 219}},
+ {"mediumpurple1", {171, 130, 255}},
+ {"mediumpurple2", {159, 121, 238}},
+ {"mediumpurple3", {137, 104, 205}},
+ {"mediumpurple4", {93, 71, 139}},
+ {"mediumseagreen", {60, 179, 113}},
+ {"mediumslateblue", {123, 104, 238}},
+ {"mediumspringgreen", {0, 250, 154}},
+ {"mediumturquoise", {72, 209, 204}},
+ {"mediumvioletred", {199, 21, 133}},
+ {"midnightblue", {25, 25, 112}},
+ {"mintcream", {245, 255, 250}},
+ {"mistyrose", {255, 228, 225}},
+ {"mistyrose1", {255, 228, 225}},
+ {"mistyrose2", {238, 213, 210}},
+ {"mistyrose3", {205, 183, 181}},
+ {"mistyrose4", {139, 125, 123}},
+ {"moccasin", {255, 228, 181}},
+ {"navajowhite", {255, 222, 173}},
+ {"navajowhite1", {255, 222, 173}},
+ {"navajowhite2", {238, 207, 161}},
+ {"navajowhite3", {205, 179, 139}},
+ {"navajowhite4", {139, 121, 94}},
+ {"navy", {0, 0, 128}},
+ {"navyblue", {0, 0, 128}},
+ {"oldlace", {253, 245, 230}},
+ {"olive", {128, 128, 0}},
+ {"olivedrab", {107, 142, 35}},
+ {"olivedrab1", {192, 255, 62}},
+ {"olivedrab2", {179, 238, 58}},
+ {"olivedrab3", {154, 205, 50}},
+ {"olivedrab4", {105, 139, 34}},
+ {"orange", {255, 165, 0}},
+ {"orange1", {255, 165, 0}},
+ {"orange2", {238, 154, 0}},
+ {"orange3", {205, 133, 0}},
+ {"orange4", {139, 90, 0}},
+ {"orangered", {255, 69, 0}},
+ {"orangered1", {255, 69, 0}},
+ {"orangered2", {238, 64, 0}},
+ {"orangered3", {205, 55, 0}},
+ {"orangered4", {139, 37, 0}},
+ {"orchid", {218, 112, 214}},
+ {"orchid1", {255, 131, 250}},
+ {"orchid2", {238, 122, 233}},
+ {"orchid3", {205, 105, 201}},
+ {"orchid4", {139, 71, 137}},
+ {"palegoldenrod", {238, 232, 170}},
+ {"palegreen", {152, 251, 152}},
+ {"palegreen1", {154, 255, 154}},
+ {"palegreen2", {144, 238, 144}},
+ {"palegreen3", {124, 205, 124}},
+ {"palegreen4", {84, 139, 84}},
+ {"paleturquoise", {175, 238, 238}},
+ {"paleturquoise1", {187, 255, 255}},
+ {"paleturquoise2", {174, 238, 238}},
+ {"paleturquoise3", {150, 205, 205}},
+ {"paleturquoise4", {102, 139, 139}},
+ {"palevioletred", {219, 112, 147}},
+ {"palevioletred1", {255, 130, 171}},
+ {"palevioletred2", {238, 121, 159}},
+ {"palevioletred3", {205, 104, 137}},
+ {"palevioletred4", {139, 71, 93}},
+ {"papayawhip", {255, 239, 213}},
+ {"peachpuff", {255, 218, 185}},
+ {"peachpuff1", {255, 218, 185}},
+ {"peachpuff2", {238, 203, 173}},
+ {"peachpuff3", {205, 175, 149}},
+ {"peachpuff4", {139, 119, 101}},
+ {"peru", {205, 133, 63}},
+ {"pink", {255, 192, 203}},
+ {"pink1", {255, 181, 197}},
+ {"pink2", {238, 169, 184}},
+ {"pink3", {205, 145, 158}},
+ {"pink4", {139, 99, 108}},
+ {"plum", {221, 160, 221}},
+ {"plum1", {255, 187, 255}},
+ {"plum2", {238, 174, 238}},
+ {"plum3", {205, 150, 205}},
+ {"plum4", {139, 102, 139}},
+ {"powderblue", {176, 224, 230}},
+ {"purple", {160, 32, 240}},
+ {"purple1", {155, 48, 255}},
+ {"purple2", {145, 44, 238}},
+ {"purple3", {125, 38, 205}},
+ {"purple4", {85, 26, 139}},
+ {"rebeccapurple", {102, 51, 153}},
+ {"red", {255, 0, 0}},
+ {"red1", {255, 0, 0}},
+ {"red2", {238, 0, 0}},
+ {"red3", {205, 0, 0}},
+ {"red4", {139, 0, 0}},
+ {"rosybrown", {188, 143, 143}},
+ {"rosybrown1", {255, 193, 193}},
+ {"rosybrown2", {238, 180, 180}},
+ {"rosybrown3", {205, 155, 155}},
+ {"rosybrown4", {139, 105, 105}},
+ {"royalblue", {65, 105, 225}},
+ {"royalblue1", {72, 118, 255}},
+ {"royalblue2", {67, 110, 238}},
+ {"royalblue3", {58, 95, 205}},
+ {"royalblue4", {39, 64, 139}},
+ {"saddlebrown", {139, 69, 19}},
+ {"salmon", {250, 128, 114}},
+ {"salmon1", {255, 140, 105}},
+ {"salmon2", {238, 130, 98}},
+ {"salmon3", {205, 112, 84}},
+ {"salmon4", {139, 76, 57}},
+ {"sandybrown", {244, 164, 96}},
+ {"seagreen", {46, 139, 87}},
+ {"seagreen1", {84, 255, 159}},
+ {"seagreen2", {78, 238, 148}},
+ {"seagreen3", {67, 205, 128}},
+ {"seagreen4", {46, 139, 87}},
+ {"seashell", {255, 245, 238}},
+ {"seashell1", {255, 245, 238}},
+ {"seashell2", {238, 229, 222}},
+ {"seashell3", {205, 197, 191}},
+ {"seashell4", {139, 134, 130}},
+ {"sienna", {160, 82, 45}},
+ {"sienna1", {255, 130, 71}},
+ {"sienna2", {238, 121, 66}},
+ {"sienna3", {205, 104, 57}},
+ {"sienna4", {139, 71, 38}},
+ {"silver", {192, 192, 192}},
+ {"skyblue", {135, 206, 235}},
+ {"skyblue1", {135, 206, 255}},
+ {"skyblue2", {126, 192, 238}},
+ {"skyblue3", {108, 166, 205}},
+ {"skyblue4", {74, 112, 139}},
+ {"slateblue", {106, 90, 205}},
+ {"slateblue1", {131, 111, 255}},
+ {"slateblue2", {122, 103, 238}},
+ {"slateblue3", {105, 89, 205}},
+ {"slateblue4", {71, 60, 139}},
+ {"slategray", {112, 128, 144}},
+ {"slategray1", {198, 226, 255}},
+ {"slategray2", {185, 211, 238}},
+ {"slategray3", {159, 182, 205}},
+ {"slategray4", {108, 123, 139}},
+ {"slategrey", {112, 128, 144}},
+ {"snow", {255, 250, 250}},
+ {"snow1", {255, 250, 250}},
+ {"snow2", {238, 233, 233}},
+ {"snow3", {205, 201, 201}},
+ {"snow4", {139, 137, 137}},
+ {"springgreen", {0, 255, 127}},
+ {"springgreen1", {0, 255, 127}},
+ {"springgreen2", {0, 238, 118}},
+ {"springgreen3", {0, 205, 102}},
+ {"springgreen4", {0, 139, 69}},
+ {"steelblue", {70, 130, 180}},
+ {"steelblue1", {99, 184, 255}},
+ {"steelblue2", {92, 172, 238}},
+ {"steelblue3", {79, 148, 205}},
+ {"steelblue4", {54, 100, 139}},
+ {"tan", {210, 180, 140}},
+ {"tan1", {255, 165, 79}},
+ {"tan2", {238, 154, 73}},
+ {"tan3", {205, 133, 63}},
+ {"tan4", {139, 90, 43}},
+ {"teal", {0, 128, 128}},
+ {"thistle", {216, 191, 216}},
+ {"thistle1", {255, 225, 255}},
+ {"thistle2", {238, 210, 238}},
+ {"thistle3", {205, 181, 205}},
+ {"thistle4", {139, 123, 139}},
+ {"tomato", {255, 99, 71}},
+ {"tomato1", {255, 99, 71}},
+ {"tomato2", {238, 92, 66}},
+ {"tomato3", {205, 79, 57}},
+ {"tomato4", {139, 54, 38}},
+ {"turquoise", {64, 224, 208}},
+ {"turquoise1", {0, 245, 255}},
+ {"turquoise2", {0, 229, 238}},
+ {"turquoise3", {0, 197, 205}},
+ {"turquoise4", {0, 134, 139}},
+ {"violet", {238, 130, 238}},
+ {"violetred", {208, 32, 144}},
+ {"violetred1", {255, 62, 150}},
+ {"violetred2", {238, 58, 140}},
+ {"violetred3", {205, 50, 120}},
+ {"violetred4", {139, 34, 82}},
+ {"webgray", {128, 128, 128}},
+ {"webgreen", {0, 128, 0}},
+ {"webgrey", {128, 128, 128}},
+ {"webmaroon", {128, 0, 0}},
+ {"webpurple", {128, 0, 128}},
+ {"wheat", {245, 222, 179}},
+ {"wheat1", {255, 231, 186}},
+ {"wheat2", {238, 216, 174}},
+ {"wheat3", {205, 186, 150}},
+ {"wheat4", {139, 126, 102}},
+ {"white", {255, 255, 255}},
+ {"whitesmoke", {245, 245, 245}},
+ {"x11gray", {190, 190, 190}},
+ {"x11green", {0, 255, 0}},
+ {"x11grey", {190, 190, 190}},
+ {"x11maroon", {176, 48, 96}},
+ {"x11purple", {160, 32, 240}},
+ {"yellow", {255, 255, 0}},
+ {"yellow1", {255, 255, 0}},
+ {"yellow2", {238, 238, 0}},
+ {"yellow3", {205, 205, 0}},
+ {"yellow4", {139, 139, 0}},
+ {"yellowgreen", {154, 205, 50}},
+ {"activeborder", {180, 180, 180}},
+ {"activecaption", {153, 180, 209}},
+ {"appworkspace", {171, 171, 171}},
+ {"background", {0, 0, 0}},
+ {"buttonhighlight", {255, 255, 255}},
+ {"buttonshadow", {160, 160, 160}},
+ {"captiontext", {0, 0, 0}},
+ {"inactiveborder", {244, 247, 252}},
+ {"inactivecaption", {191, 205, 219}},
+ {"inactivecaptiontext", {0, 0, 0}},
+ {"infobackground", {255, 255, 225}},
+ {"infotext", {0, 0, 0}},
+ {"menu", {240, 240, 240}},
+ {"menutext", {0, 0, 0}},
+ {"scrollbar", {200, 200, 200}},
+ {"threeddarkshadow", {0, 0, 0}},
+ {"threedface", {0, 0, 0}},
+ {"threedhighlight", {0, 0, 0}},
+ {"threedlightshadow", {0, 0, 0}},
+ {"threedshadow", {0, 0, 0}},
+ {"transparent", {0, 0, 0, 0}},
+ {"window", {255, 255, 255}},
+ {"windowframe", {100, 100, 100}},
+ {"windowtext", {0, 0, 0}},
+};
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_COLORS_LIST_HXX
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
new file mode 100644
index 0000000..aed035a
--- /dev/null
+++ b/src/libserver/css/css_parser.cxx
@@ -0,0 +1,892 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_parser.hxx"
+#include "css_tokeniser.hxx"
+#include "css_selector.hxx"
+#include "css_rule.hxx"
+#include "css_util.hxx"
+#include "css.hxx"
+#include "fmt/core.h"
+
+#include <vector>
+#include <unicode/utf8.h>
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+const css_consumed_block css_parser_eof_block{};
+
+auto css_consumed_block::attach_block(consumed_block_ptr &&block) -> bool
+{
+ if (std::holds_alternative<std::monostate>(content)) {
+ /* Switch from monostate */
+ content = std::vector<consumed_block_ptr>();
+ }
+ else if (!std::holds_alternative<std::vector<consumed_block_ptr>>(content)) {
+ /* A single component, cannot attach a block ! */
+ return false;
+ }
+
+ auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content);
+ value_vec.push_back(std::move(block));
+
+ return true;
+}
+
+auto css_consumed_block::add_function_argument(consumed_block_ptr &&block) -> bool
+{
+ if (!std::holds_alternative<css_function_block>(content)) {
+ return false;
+ }
+
+ auto &&func_bloc = std::get<css_function_block>(content);
+ func_bloc.args.push_back(std::move(block));
+
+ return true;
+}
+
+auto css_consumed_block::token_type_str(void) const -> const char *
+{
+ const auto *ret = "";
+
+ switch (tag) {
+ case parser_tag_type::css_top_block:
+ ret = "top";
+ break;
+ case parser_tag_type::css_qualified_rule:
+ ret = "qualified rule";
+ break;
+ case parser_tag_type::css_at_rule:
+ ret = "at rule";
+ break;
+ case parser_tag_type::css_simple_block:
+ ret = "simple block";
+ break;
+ case parser_tag_type::css_function:
+ ret = "function";
+ break;
+ case parser_tag_type::css_function_arg:
+ ret = "function arg";
+ break;
+ case parser_tag_type::css_component:
+ ret = "component";
+ break;
+ case parser_tag_type::css_eof_block:
+ ret = "eof";
+ break;
+ }
+
+ return ret;
+}
+
+auto css_consumed_block::debug_str(void) -> std::string
+{
+ std::string ret = fmt::format(R"("type": "{}", "value": )", token_type_str());
+
+ std::visit([&](auto &arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
+ /* Array of blocks */
+ ret += "[";
+ for (const auto &block: arg) {
+ ret += "{";
+ ret += block->debug_str();
+ ret += "}, ";
+ }
+
+ if (*(--ret.end()) == ' ') {
+ ret.pop_back();
+ ret.pop_back(); /* Last ',' */
+ }
+ ret += "]";
+ }
+ else if constexpr (std::is_same_v<T, std::monostate>) {
+ /* Empty block */
+ ret += R"("empty")";
+ }
+ else if constexpr (std::is_same_v<T, css_function_block>) {
+ ret += R"({ "content": {"token": )";
+ ret += "\"" + arg.function.debug_token_str() + "\", ";
+ ret += R"("arguments": [)";
+
+ for (const auto &block: arg.args) {
+ ret += "{";
+ ret += block->debug_str();
+ ret += "}, ";
+ }
+ if (*(--ret.end()) == ' ') {
+ ret.pop_back();
+ ret.pop_back(); /* Last ',' */
+ }
+ ret += "]}}";
+ }
+ else {
+ /* Single element block */
+ ret += "\"" + arg.debug_token_str() + "\"";
+ }
+ },
+ content);
+
+ return ret;
+}
+
+class css_parser {
+public:
+ css_parser(void) = delete; /* Require mempool to be set for logging */
+ explicit css_parser(rspamd_mempool_t *pool)
+ : pool(pool)
+ {
+ style_object.reset();
+ error.type = css_parse_error_type::PARSE_ERROR_NO_ERROR;
+ }
+
+ /*
+ * This constructor captures existing via unique_ptr, but it does not
+ * destruct it on errors (we assume that it is owned somewhere else)
+ */
+ explicit css_parser(std::shared_ptr<css_style_sheet> &&existing, rspamd_mempool_t *pool)
+ : style_object(existing), pool(pool)
+ {
+ error.type = css_parse_error_type::PARSE_ERROR_NO_ERROR;
+ }
+
+ /*
+ * Process input css blocks
+ */
+ std::unique_ptr<css_consumed_block> consume_css_blocks(const std::string_view &sv);
+ /*
+ * Process a single css rule
+ */
+ std::unique_ptr<css_consumed_block> consume_css_rule(const std::string_view &sv);
+ std::optional<css_parse_error> consume_input(const std::string_view &sv);
+
+ auto get_object_maybe(void) -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>
+ {
+ if (style_object) {
+ return style_object;
+ }
+
+ return tl::make_unexpected(error);
+ }
+
+ /* Helper parser methods */
+ static bool need_unescape(const std::string_view &sv);
+
+private:
+ std::shared_ptr<css_style_sheet> style_object;
+ std::unique_ptr<css_tokeniser> tokeniser;
+
+ css_parse_error error;
+ rspamd_mempool_t *pool;
+
+ int rec_level = 0;
+ const int max_rec = 20;
+ bool eof = false;
+
+ /* Consumers */
+ auto component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+ css_parser_token::token_type expected_end,
+ bool consume_current) -> bool;
+ auto qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto at_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+};
+
+/*
+ * Find if we need to unescape css
+ */
+bool css_parser::need_unescape(const std::string_view &sv)
+{
+ bool in_quote = false;
+ char quote_char, prev_c = 0;
+
+ for (const auto c: sv) {
+ if (!in_quote) {
+ if (c == '"' || c == '\'') {
+ in_quote = true;
+ quote_char = c;
+ }
+ else if (c == '\\') {
+ return true;
+ }
+ }
+ else {
+ if (c == quote_char) {
+ if (prev_c != '\\') {
+ in_quote = false;
+ }
+ }
+ prev_c = c;
+ }
+ }
+
+ return false;
+}
+
+auto css_parser::function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ auto ret = true, want_more = true;
+
+ msg_debug_css("consume function block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing function value");
+ return false;
+ }
+
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::ebrace_token:
+ ret = true;
+ want_more = false;
+ break;
+ case css_parser_token::token_type::comma_token:
+ case css_parser_token::token_type::delim_token:
+ case css_parser_token::token_type::obrace_token:
+ break;
+ default:
+ /* Attach everything to the function block */
+ top->add_function_argument(std::make_unique<css_consumed_block>(
+ css::css_consumed_block::parser_tag_type::css_function_arg,
+ std::move(next_token)));
+ break;
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+ css_parser_token::token_type expected_end,
+ bool consume_current) -> bool
+{
+ auto ret = true;
+ std::unique_ptr<css_consumed_block> block;
+
+ msg_debug_css("consume simple block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (!consume_current && ++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing simple block value");
+ return false;
+ }
+
+ if (!consume_current) {
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ }
+
+
+ while (ret && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ if (next_token.type == expected_end) {
+ break;
+ }
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(consume_current ? top : block);
+ break;
+ }
+ }
+
+ if (!consume_current && ret) {
+ msg_debug_css("attached node 'simple block' rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+
+ if (!consume_current) {
+ --rec_level;
+ }
+
+ return ret;
+}
+
+auto css_parser::qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ msg_debug_css("consume qualified block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing qualified rule value");
+ return false;
+ }
+
+ auto ret = true, want_more = true;
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_qualified_rule);
+
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::cdo_token:
+ case css_parser_token::token_type::cdc_token:
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ /* Ignore */
+ ret = true;
+ }
+ else {
+ }
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token, false);
+ want_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(block);
+ break;
+ };
+ }
+
+ if (ret) {
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ msg_debug_css("attached node qualified rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::at_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ msg_debug_css("consume at-rule block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing at keyword");
+ return false;
+ }
+
+ auto ret = true, want_more = true;
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_at_rule);
+
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::cdo_token:
+ case css_parser_token::token_type::cdc_token:
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ /* Ignore */
+ ret = true;
+ }
+ else {
+ }
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token, false);
+ want_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::semicolon_token:
+ want_more = false;
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(block);
+ break;
+ };
+ }
+
+ if (ret) {
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ msg_debug_css("attached node qualified rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ auto ret = true, need_more = true;
+ std::unique_ptr<css_consumed_block> block;
+
+ msg_debug_css("consume component block; top block: %s, recursion level %d",
+ top->token_type_str(), rec_level);
+
+ if (++rec_level > max_rec) {
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING,
+ "maximum nesting has reached when parsing component value");
+ return false;
+ }
+
+ while (ret && need_more && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token,
+ true);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::obrace_token:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ebrace_token,
+ true);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::osqbrace_token:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::esqbrace_token,
+ true);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::function_token: {
+ need_more = false;
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_function,
+ std::move(next_token));
+
+ /* Consume the rest */
+ ret = function_consumer(block);
+ break;
+ }
+ default:
+ block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_component,
+ std::move(next_token));
+ need_more = false;
+ break;
+ }
+ }
+
+ if (ret && block) {
+ msg_debug_css("attached node component rule %s; length=%d",
+ block->token_type_str(), (int) block->size());
+ top->attach_block(std::move(block));
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::consume_css_blocks(const std::string_view &sv) -> std::unique_ptr<css_consumed_block>
+{
+ tokeniser = std::make_unique<css_tokeniser>(pool, sv);
+ auto ret = true;
+
+ auto consumed_blocks =
+ std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_top_block);
+
+ while (!eof && ret) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::at_keyword_token:
+ tokeniser->pushback_token(next_token);
+ ret = at_rule_consumer(consumed_blocks);
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = qualified_rule_consumer(consumed_blocks);
+ break;
+ }
+ }
+
+ tokeniser.reset(nullptr); /* No longer needed */
+
+ return consumed_blocks;
+}
+
+auto css_parser::consume_css_rule(const std::string_view &sv) -> std::unique_ptr<css_consumed_block>
+{
+ tokeniser = std::make_unique<css_tokeniser>(pool, sv);
+ auto ret = true;
+
+ auto rule_block =
+ std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_simple_block);
+
+ while (!eof && ret) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(next_token);
+ ret = component_value_consumer(rule_block);
+ break;
+ }
+ }
+
+ tokeniser.reset(nullptr); /* No longer needed */
+
+ return rule_block;
+}
+
+std::optional<css_parse_error>
+css_parser::consume_input(const std::string_view &sv)
+{
+ auto &&consumed_blocks = consume_css_blocks(sv);
+ const auto &rules = consumed_blocks->get_blocks_or_empty();
+
+ if (rules.empty()) {
+ if (error.type == css_parse_error_type::PARSE_ERROR_NO_ERROR) {
+ return css_parse_error(css_parse_error_type::PARSE_ERROR_EMPTY,
+ "no css rules consumed");
+ }
+ else {
+ return error;
+ }
+ }
+
+ if (!style_object) {
+ style_object = std::make_shared<css_style_sheet>(pool);
+ }
+
+ for (auto &&rule: rules) {
+ /*
+ * For now, we do not need any of the at rules, so we can safely ignore them
+ */
+ auto &&children = rule->get_blocks_or_empty();
+
+ if (children.size() > 1 &&
+ children[0]->tag == css_consumed_block::parser_tag_type::css_component) {
+ auto simple_block = std::find_if(children.begin(), children.end(),
+ [](auto &bl) {
+ return bl->tag == css_consumed_block::parser_tag_type::css_simple_block;
+ });
+
+ if (simple_block != children.end()) {
+ /*
+ * We have a component and a simple block,
+ * so we can parse a selector and then extract
+ * declarations from a simple block
+ */
+
+ /* First, tag all components as preamble */
+ auto selector_it = children.cbegin();
+
+ auto selector_token_functor = [&selector_it, &simple_block](void)
+ -> const css_consumed_block & {
+ for (;;) {
+ if (selector_it == simple_block) {
+ return css_parser_eof_block;
+ }
+
+ const auto &ret = (*selector_it);
+
+ ++selector_it;
+
+ return *ret;
+ }
+ };
+
+ auto selectors_vec = process_selector_tokens(pool, selector_token_functor);
+
+ if (selectors_vec.size() > 0) {
+ msg_debug_css("processed %d selectors", (int) selectors_vec.size());
+ auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin();
+ auto decls_end = (*simple_block)->get_blocks_or_empty().cend();
+ auto declaration_token_functor = [&decls_it, &decls_end](void)
+ -> const css_consumed_block & {
+ for (;;) {
+ if (decls_it == decls_end) {
+ return css_parser_eof_block;
+ }
+
+ const auto &ret = (*decls_it);
+
+ ++decls_it;
+
+ return *ret;
+ }
+ };
+
+ auto declarations_vec = process_declaration_tokens(pool,
+ declaration_token_functor);
+
+ if (declarations_vec && !declarations_vec->get_rules().empty()) {
+ msg_debug_css("processed %d rules",
+ (int) declarations_vec->get_rules().size());
+
+ for (auto &&selector: selectors_vec) {
+ style_object->add_selector_rule(std::move(selector),
+ declarations_vec);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ auto debug_str = consumed_blocks->debug_str();
+ msg_debug_css("consumed css: {%*s}", (int) debug_str.size(), debug_str.data());
+
+ return std::nullopt;
+}
+
+auto get_selectors_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor
+{
+ css_parser parser(pool);
+
+ auto &&consumed_blocks = parser.consume_css_blocks(st);
+ const auto &rules = consumed_blocks->get_blocks_or_empty();
+
+ auto rules_it = rules.begin();
+ auto &&children = (*rules_it)->get_blocks_or_empty();
+ auto cur = children.begin();
+ auto last = children.end();
+
+ /*
+ * We use move only wrapper to state the fact that the cosumed blocks
+ * are moved into the closure, not copied.
+ * It prevents us from thinking about copies of the blocks and
+ * functors.
+ * Mutable lambda is required to copy iterators inside of the closure,
+ * as, again, it is C++ where lifetime of the objects must be explicitly
+ * transferred. On the other hand, we could move all stuff inside and remove
+ * mutable.
+ */
+ return [cur, consumed_blocks = std::move(consumed_blocks), last](void) mutable
+ -> const css_consumed_block & {
+ if (cur != last) {
+ const auto &ret = (*cur);
+
+ ++cur;
+
+ return *ret;
+ }
+
+ return css_parser_eof_block;
+ };
+}
+
+auto get_rules_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor
+{
+ css_parser parser(pool);
+
+ auto &&consumed_blocks = parser.consume_css_rule(st);
+ const auto &rules = consumed_blocks->get_blocks_or_empty();
+
+ auto cur = rules.begin();
+ auto last = rules.end();
+
+ return [cur, consumed_blocks = std::move(consumed_blocks), last](void) mutable
+ -> const css_consumed_block & {
+ if (cur != last) {
+ const auto &ret = (*cur);
+
+ ++cur;
+
+ return *ret;
+ }
+
+ return css_parser_eof_block;
+ };
+}
+
+
+/*
+ * Wrapper for the parser
+ */
+auto parse_css(rspamd_mempool_t *pool, const std::string_view &st,
+ std::shared_ptr<css_style_sheet> &&other)
+ -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>
+{
+ css_parser parser(std::forward<std::shared_ptr<css_style_sheet>>(other), pool);
+ std::string_view processed_input;
+
+ if (css_parser::need_unescape(st)) {
+ processed_input = rspamd::css::unescape_css(pool, st);
+ }
+ else {
+ /* Lowercase inplace */
+ auto *nspace = rspamd_mempool_alloc_buffer(pool, st.size());
+ rspamd_str_copy_lc(st.data(), nspace, st.size());
+ processed_input = std::string_view{nspace, st.size()};
+ }
+
+ auto maybe_error = parser.consume_input(processed_input);
+ if (!maybe_error) {
+ return parser.get_object_maybe();
+ }
+
+ return tl::make_unexpected(maybe_error.value());
+}
+
+auto parse_css_declaration(rspamd_mempool_t *pool, const std::string_view &st)
+ -> rspamd::html::html_block *
+{
+ std::string_view processed_input;
+
+ if (css_parser::need_unescape(st)) {
+ processed_input = rspamd::css::unescape_css(pool, st);
+ }
+ else {
+ auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, st.size()));
+ auto nlen = rspamd_str_copy_lc(st.data(), nspace, st.size());
+ processed_input = std::string_view{nspace, nlen};
+ }
+ auto &&res = process_declaration_tokens(pool,
+ get_rules_parser_functor(pool, processed_input));
+
+ if (res) {
+ return res->compile_to_block(pool);
+ }
+
+ return nullptr;
+}
+
+TEST_SUITE("css")
+{
+ TEST_CASE("parse colors")
+ {
+ const std::vector<const char *> cases{
+ "P { CoLoR: rgb(100%, 50%, 0%); opacity: -1; width: 1em; display: none; } /* very transparent solid orange тест */",
+ "p { color: rgb(100%, 50%, 0%); opacity: 2; display: inline; } /* very transparent solid orange */",
+ "p { color: rgb(100%, 50%, 0%); opacity: 0.5; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 1; width: 99%; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 10%; width: 99%; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 10%; width: 100px; } /* very transparent solid orange */\n",
+ "p { color: rgb(100%, 50%, 0%); opacity: 10% } /* very transparent solid orange */\n",
+ "* { color: hsl(0, 100%, 50%) !important } /* red */\n",
+ "* { color: hsl(120, 100%, 50%) important } /* lime */\n",
+ "* { color: hsl(120, 100%, 25%) } /* dark green */\n",
+ "* { color: hsl(120, 100%, 75%) } /* light green */\n",
+ "* { color: hsl(120, 75%, 75%) } /* pastel green, and so on */\n",
+ "em { color: #f00 } /* #rgb */\n",
+ "em { color: #ff0000 } /* #rrggbb */\n",
+ "em { color: rgb(255,0,0) }\n",
+ "em { color: rgb(100%, 0%, 0%) }\n",
+ "body {color: black; background: white }\n",
+ "h1 { color: maroon }\n",
+ "h2 { color: olive }\n",
+ "em { color: rgb(255,0,0) } /* integer range 0 - 255 */\n",
+ "em { color: rgb(300,0,0) } /* clipped to rgb(255,0,0) */\n",
+ "em { color: rgb(255,-10,0) } /* clipped to rgb(255,0,0) */\n",
+ "em { color: rgb(110%, 0%, 0%) } /* clipped to rgb(100%,0%,0%) */\n",
+ "em { color: rgb(255,0,0) } /* integer range 0 - 255 */\n",
+ "em { color: rgba(255,0,0,1) /* the same, with explicit opacity of 1 */\n",
+ "em { color: rgb(100%,0%,0%) } /* float range 0.0% - 100.0% */\n",
+ "em { color: rgba(100%,0%,0%,1) } /* the same, with explicit opacity of 1 */\n",
+ "p { color: rgba(0,0,255,0.5) } /* semi-transparent solid blue */\n",
+ "p { color: rgba(100%, 50%, 0%, 0.1) } /* very transparent solid orange */",
+ ".chat-icon[_ng-cnj-c0]::before{content:url(group-2.63e87cd21fbf8c966dd.svg);width:60px;height:60px;display:block}",
+ "tt{color:#1e3482}",
+ "tt{unicode-range: u+0049-u+004a,u+0020;}",
+ "@import url(https://fonts.googleapis.com/css?family=arial:300,400,7000;",
+ "tt{color:black;\v}",
+ "tt{color:black;\f}",
+ };
+
+ rspamd_mempool_t *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "css", 0);
+ for (const auto &c: cases) {
+ SUBCASE((std::string("parse css: ") + c).c_str())
+ {
+ CHECK(parse_css(pool, c, nullptr).value().get() != nullptr);
+ }
+ }
+
+ /* We now merge all styles together */
+ SUBCASE("merged css parse")
+ {
+ std::shared_ptr<css_style_sheet> merged;
+ for (const auto &c: cases) {
+ auto ret = parse_css(pool, c, std::move(merged));
+ merged.swap(ret.value());
+ }
+
+ CHECK(merged.get() != nullptr);
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+}
+}// namespace rspamd::css
diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx
new file mode 100644
index 0000000..d5a9671
--- /dev/null
+++ b/src/libserver/css/css_parser.hxx
@@ -0,0 +1,244 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_PARSER_HXX
+#define RSPAMD_CSS_PARSER_HXX
+
+#include <variant>
+#include <vector>
+#include <memory>
+#include <string>
+
+#include "function2/function2.hpp"
+#include "css_tokeniser.hxx"
+#include "parse_error.hxx"
+#include "contrib/expected/expected.hpp"
+#include "logger.h"
+
+/* Forward declaration */
+namespace rspamd::html {
+struct html_block;
+}
+
+namespace rspamd::css {
+
+/*
+ * Represents a consumed token by a parser
+ */
+class css_consumed_block {
+public:
+ enum class parser_tag_type : std::uint8_t {
+ css_top_block = 0,
+ css_qualified_rule,
+ css_at_rule,
+ css_simple_block,
+ css_function,
+ css_function_arg,
+ css_component,
+ css_eof_block,
+ };
+ using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
+
+ struct css_function_block {
+ css_parser_token function;
+ std::vector<consumed_block_ptr> args;
+
+ css_function_block(css_parser_token &&tok)
+ : function(std::forward<css_parser_token>(tok))
+ {
+ }
+
+ auto as_string() const -> std::string_view
+ {
+ return function.get_string_or_default("");
+ }
+
+ static auto empty_function() -> const css_function_block &
+ {
+ static const css_function_block invalid(
+ css_parser_token(css_parser_token::token_type::eof_token,
+ css_parser_token_placeholder()));
+ return invalid;
+ }
+ };
+
+ css_consumed_block()
+ : tag(parser_tag_type::css_eof_block)
+ {
+ }
+ css_consumed_block(parser_tag_type tag)
+ : tag(tag)
+ {
+ if (tag == parser_tag_type::css_top_block ||
+ tag == parser_tag_type::css_qualified_rule ||
+ tag == parser_tag_type::css_simple_block) {
+ /* Pre-allocate content for known vector blocks */
+ std::vector<consumed_block_ptr> vec;
+ vec.reserve(4);
+ content = std::move(vec);
+ }
+ }
+ /* Construct a block from a single lexer token (for trivial blocks) */
+ explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok)
+ : tag(tag)
+ {
+ if (tag == parser_tag_type::css_function) {
+ content = css_function_block{std::move(tok)};
+ }
+ else {
+ content = std::move(tok);
+ }
+ }
+
+ /* Attach a new block to the compound block, consuming block inside */
+ auto attach_block(consumed_block_ptr &&block) -> bool;
+ /* Attach a new argument to the compound function block, consuming block inside */
+ auto add_function_argument(consumed_block_ptr &&block) -> bool;
+
+ auto assign_token(css_parser_token &&tok) -> void
+ {
+ content = std::move(tok);
+ }
+
+ /* Empty blocks used to avoid type checks in loops */
+ const inline static std::vector<consumed_block_ptr> empty_block_vec{};
+
+ auto is_blocks_vec() const -> bool
+ {
+ return (std::holds_alternative<std::vector<consumed_block_ptr>>(content));
+ }
+
+ auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr> &
+ {
+ if (is_blocks_vec()) {
+ return std::get<std::vector<consumed_block_ptr>>(content);
+ }
+
+ return empty_block_vec;
+ }
+
+ auto is_token() const -> bool
+ {
+ return (std::holds_alternative<css_parser_token>(content));
+ }
+
+ auto get_token_or_empty() const -> const css_parser_token &
+ {
+ if (is_token()) {
+ return std::get<css_parser_token>(content);
+ }
+
+ return css_parser_eof_token();
+ }
+
+ auto is_function() const -> bool
+ {
+ return (std::holds_alternative<css_function_block>(content));
+ }
+
+ auto get_function_or_invalid() const -> const css_function_block &
+ {
+ if (is_function()) {
+ return std::get<css_function_block>(content);
+ }
+
+ return css_function_block::empty_function();
+ }
+
+ auto size() const -> std::size_t
+ {
+ auto ret = 0;
+
+ std::visit([&](auto &arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
+ /* Array of blocks */
+ ret = arg.size();
+ }
+ else if constexpr (std::is_same_v<T, std::monostate>) {
+ /* Empty block */
+ ret = 0;
+ }
+ else {
+ /* Single element block */
+ ret = 1;
+ }
+ },
+ content);
+
+ return ret;
+ }
+
+ auto is_eof() -> bool
+ {
+ return tag == parser_tag_type::css_eof_block;
+ }
+
+ /* Debug methods */
+ auto token_type_str(void) const -> const char *;
+ auto debug_str(void) -> std::string;
+
+public:
+ parser_tag_type tag;
+
+private:
+ std::variant<std::monostate,
+ std::vector<consumed_block_ptr>,
+ css_parser_token,
+ css_function_block>
+ content;
+};
+
+extern const css_consumed_block css_parser_eof_block;
+
+using blocks_gen_functor = fu2::unique_function<const css_consumed_block &(void)>;
+
+class css_style_sheet;
+/*
+ * Update the existing stylesheet with another stylesheet
+ */
+auto parse_css(rspamd_mempool_t *pool, const std::string_view &st,
+ std::shared_ptr<css_style_sheet> &&other)
+ -> tl::expected<std::shared_ptr<css_style_sheet>, css_parse_error>;
+
+/*
+ * Creates a functor to consume css selectors sequence
+ */
+auto get_selectors_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor;
+
+/*
+ * Creates a functor to process a rule definition (e.g. from embedded style tag for
+ * an element)
+ */
+auto get_rules_parser_functor(rspamd_mempool_t *pool,
+ const std::string_view &st) -> blocks_gen_functor;
+
+/**
+ * Parses a css declaration (e.g. embedded css and returns a completed html block)
+ * @param pool
+ * @param st
+ * @return
+ */
+auto parse_css_declaration(rspamd_mempool_t *pool, const std::string_view &st)
+ -> rspamd::html::html_block *;
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_PARSER_HXX
diff --git a/src/libserver/css/css_property.cxx b/src/libserver/css/css_property.cxx
new file mode 100644
index 0000000..1557109
--- /dev/null
+++ b/src/libserver/css/css_property.cxx
@@ -0,0 +1,69 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_property.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include "libutil/cxx/util.hxx"
+
+namespace rspamd::css {
+
+constexpr const auto prop_names_map = frozen::make_unordered_map<frozen::string, css_property_type>({
+ {"font", css_property_type::PROPERTY_FONT},
+ {"font-color", css_property_type::PROPERTY_FONT_COLOR},
+ {"font-size", css_property_type::PROPERTY_FONT_SIZE},
+ {"color", css_property_type::PROPERTY_COLOR},
+ {"bgcolor", css_property_type::PROPERTY_BGCOLOR},
+ {"background-color", css_property_type::PROPERTY_BGCOLOR},
+ {"background", css_property_type::PROPERTY_BACKGROUND},
+ {"height", css_property_type::PROPERTY_HEIGHT},
+ {"width", css_property_type::PROPERTY_WIDTH},
+ {"display", css_property_type::PROPERTY_DISPLAY},
+ {"visibility", css_property_type::PROPERTY_VISIBILITY},
+ {"opacity", css_property_type::PROPERTY_OPACITY},
+});
+
+/* Ensure that we have all cases listed */
+static_assert(prop_names_map.size() >= static_cast<int>(css_property_type::PROPERTY_NYI));
+
+auto token_string_to_property(const std::string_view &inp)
+ -> css_property_type
+{
+
+ css_property_type ret = css_property_type::PROPERTY_NYI;
+
+ auto known_type = find_map(prop_names_map, inp);
+
+ if (known_type) {
+ ret = known_type.value().get();
+ }
+
+ return ret;
+}
+
+auto css_property::from_token(const css_parser_token &tok)
+ -> tl::expected<css_property, css_parse_error>
+{
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ auto sv = tok.get_string_or_default("");
+
+ return css_property{token_string_to_property(sv), css_property_flag::FLAG_NORMAL};
+ }
+
+ return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
+}
+
+}// namespace rspamd::css
diff --git a/src/libserver/css/css_property.hxx b/src/libserver/css/css_property.hxx
new file mode 100644
index 0000000..9661222
--- /dev/null
+++ b/src/libserver/css/css_property.hxx
@@ -0,0 +1,172 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef RSPAMD_CSS_PROPERTY_HXX
+#define RSPAMD_CSS_PROPERTY_HXX
+
+#include <string>
+#include "css_tokeniser.hxx"
+#include "parse_error.hxx"
+#include "contrib/expected/expected.hpp"
+
+namespace rspamd::css {
+
+/*
+ * To be extended with properties that are interesting from the email
+ * point of view
+ */
+enum class css_property_type : std::uint16_t {
+ PROPERTY_FONT = 0,
+ PROPERTY_FONT_COLOR,
+ PROPERTY_FONT_SIZE,
+ PROPERTY_COLOR,
+ PROPERTY_BGCOLOR,
+ PROPERTY_BACKGROUND,
+ PROPERTY_HEIGHT,
+ PROPERTY_WIDTH,
+ PROPERTY_DISPLAY,
+ PROPERTY_VISIBILITY,
+ PROPERTY_OPACITY,
+ PROPERTY_NYI,
+};
+
+enum class css_property_flag : std::uint16_t {
+ FLAG_NORMAL,
+ FLAG_IMPORTANT,
+ FLAG_NOT_IMPORTANT
+};
+
+struct alignas(int) css_property {
+ css_property_type type;
+ css_property_flag flag;
+
+ css_property(css_property_type t, css_property_flag fl = css_property_flag::FLAG_NORMAL)
+ : type(t), flag(fl)
+ {
+ }
+ static tl::expected<css_property, css_parse_error> from_token(
+ const css_parser_token &tok);
+
+ constexpr auto to_string(void) const -> const char *
+ {
+ const char *ret = "nyi";
+
+ switch (type) {
+ case css_property_type::PROPERTY_FONT:
+ ret = "font";
+ break;
+ case css_property_type::PROPERTY_FONT_COLOR:
+ ret = "font-color";
+ break;
+ case css_property_type::PROPERTY_FONT_SIZE:
+ ret = "font-size";
+ break;
+ case css_property_type::PROPERTY_COLOR:
+ ret = "color";
+ break;
+ case css_property_type::PROPERTY_BGCOLOR:
+ ret = "bgcolor";
+ break;
+ case css_property_type::PROPERTY_BACKGROUND:
+ ret = "background";
+ break;
+ case css_property_type::PROPERTY_HEIGHT:
+ ret = "height";
+ break;
+ case css_property_type::PROPERTY_WIDTH:
+ ret = "width";
+ break;
+ case css_property_type::PROPERTY_DISPLAY:
+ ret = "display";
+ break;
+ case css_property_type::PROPERTY_VISIBILITY:
+ ret = "visibility";
+ break;
+ case css_property_type::PROPERTY_OPACITY:
+ ret = "opacity";
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+ }
+
+ /* Helpers to define which values are valid for which properties */
+ auto is_color(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_COLOR ||
+ type == css_property_type::PROPERTY_BACKGROUND ||
+ type == css_property_type::PROPERTY_BGCOLOR ||
+ type == css_property_type::PROPERTY_FONT_COLOR ||
+ type == css_property_type::PROPERTY_FONT;
+ }
+ auto is_dimension(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_HEIGHT ||
+ type == css_property_type::PROPERTY_WIDTH ||
+ type == css_property_type::PROPERTY_FONT_SIZE ||
+ type == css_property_type::PROPERTY_FONT;
+ }
+
+ auto is_normal_number(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_OPACITY;
+ }
+
+ auto is_display(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_DISPLAY;
+ }
+
+ auto is_visibility(void) const -> bool
+ {
+ return type == css_property_type::PROPERTY_VISIBILITY;
+ }
+
+ auto operator==(const css_property &other) const
+ {
+ return type == other.type;
+ }
+};
+
+
+}// namespace rspamd::css
+
+/* Make properties hashable */
+namespace std {
+template<>
+class hash<rspamd::css::css_property> {
+public:
+ using is_avalanching = void;
+ /* Mix bits to provide slightly better distribution but being constexpr */
+ constexpr size_t operator()(const rspamd::css::css_property &prop) const
+ {
+ std::size_t key = 0xdeadbeef ^ static_cast<std::size_t>(prop.type);
+ key = (~key) + (key << 21);
+ key = key ^ (key >> 24);
+ key = (key + (key << 3)) + (key << 8);
+ key = key ^ (key >> 14);
+ key = (key + (key << 2)) + (key << 4);
+ key = key ^ (key >> 28);
+ key = key + (key << 31);
+ return key;
+ }
+};
+}// namespace std
+
+#endif//RSPAMD_CSS_PROPERTY_HXX \ No newline at end of file
diff --git a/src/libserver/css/css_rule.cxx b/src/libserver/css/css_rule.cxx
new file mode 100644
index 0000000..4e33ac7
--- /dev/null
+++ b/src/libserver/css/css_rule.cxx
@@ -0,0 +1,531 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_rule.hxx"
+#include "css.hxx"
+#include "libserver/html/html_block.hxx"
+#include <limits>
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+/* Class methods */
+void css_rule::override_values(const css_rule &other)
+{
+ int bits = 0;
+ /* Ensure that our bitset is large enough */
+ static_assert(1 << std::variant_size_v<decltype(css_value::value)> <
+ std::numeric_limits<int>::max());
+
+ for (const auto &v: values) {
+ bits |= static_cast<int>(1 << v.value.index());
+ }
+
+ for (const auto &ov: other.values) {
+ if (isset(&bits, static_cast<int>(1 << ov.value.index()))) {
+ /* We need to override the existing value */
+ /*
+ * The algorithm is not very efficient,
+ * so we need to sort the values first and have a O(N) algorithm
+ * On the other hand, values vectors are usually limited to the
+ * number of elements about less then 10, so this O(N^2) algorithm
+ * is probably ok here
+ */
+ for (auto &v: values) {
+ if (v.value.index() == ov.value.index()) {
+ v = ov;
+ }
+ }
+ }
+ }
+
+ /* Copy only not set values */
+ std::copy_if(other.values.begin(), other.values.end(), std::back_inserter(values),
+ [&bits](const auto &elt) -> bool {
+ return (bits & (1 << static_cast<int>(elt.value.index()))) == 0;
+ });
+}
+
+void css_rule::merge_values(const css_rule &other)
+{
+ unsigned int bits = 0;
+
+ for (const auto &v: values) {
+ bits |= 1 << v.value.index();
+ }
+
+ /* Copy only not set values */
+ std::copy_if(other.values.begin(), other.values.end(), std::back_inserter(values),
+ [&bits](const auto &elt) -> bool {
+ return (bits & (1 << elt.value.index())) == 0;
+ });
+}
+
+auto css_declarations_block::add_rule(rule_shared_ptr rule) -> bool
+{
+ auto it = rules.find(rule);
+ auto &&remote_prop = rule->get_prop();
+ auto ret = true;
+
+ if (rule->get_values().size() == 0) {
+ /* Ignore rules with no values */
+ return false;
+ }
+
+ if (it != rules.end()) {
+ auto &&local_rule = *it;
+ auto &&local_prop = local_rule->get_prop();
+
+ if (local_prop.flag == css_property_flag::FLAG_IMPORTANT) {
+ if (remote_prop.flag == css_property_flag::FLAG_IMPORTANT) {
+ local_rule->override_values(*rule);
+ }
+ else {
+ /* Override remote not important over local important */
+ local_rule->merge_values(*rule);
+ }
+ }
+ else if (local_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) {
+ if (remote_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) {
+ local_rule->override_values(*rule);
+ }
+ else {
+ /* Override local not important over important */
+ local_rule->merge_values(*rule);
+ }
+ }
+ else {
+ if (remote_prop.flag == css_property_flag::FLAG_IMPORTANT) {
+ /* Override with remote */
+ local_rule->override_values(*rule);
+ }
+ else if (remote_prop.flag == css_property_flag::FLAG_NOT_IMPORTANT) {
+ /* Ignore remote not important over local normal */
+ ret = false;
+ }
+ else {
+ /* Merge both */
+ local_rule->merge_values(*rule);
+ }
+ }
+ }
+ else {
+ rules.insert(std::move(rule));
+ }
+
+ return ret;
+}
+
+}// namespace rspamd::css
+
+namespace rspamd::css {
+
+/* Static functions */
+
+static auto
+allowed_property_value(const css_property &prop, const css_consumed_block &parser_block)
+ -> std::optional<css_value>
+{
+ if (prop.is_color()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::hash_token) {
+ return css_value::maybe_color_from_hex(tok.get_string_or_default(""));
+ }
+ else if (tok.type == css_parser_token::token_type::ident_token) {
+ auto &&ret = css_value::maybe_color_from_string(tok.get_string_or_default(""));
+
+ return ret;
+ }
+ }
+ else if (parser_block.is_function()) {
+ const auto &func = parser_block.get_function_or_invalid();
+
+ auto &&ret = css_value::maybe_color_from_function(func);
+ return ret;
+ }
+ }
+ if (prop.is_dimension()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ return css_value::maybe_dimension_from_number(tok);
+ }
+ }
+ }
+ if (prop.is_display()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ return css_value::maybe_display_from_string(tok.get_string_or_default(""));
+ }
+ }
+ }
+ if (prop.is_visibility()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ return css_value::maybe_display_from_string(tok.get_string_or_default(""));
+ }
+ }
+ }
+ if (prop.is_normal_number()) {
+ if (parser_block.is_token()) {
+ /* A single token */
+ const auto &tok = parser_block.get_token_or_empty();
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ return css_value{tok.get_normal_number_or_default(0)};
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_block_functor)
+ -> css_declarations_block_ptr
+{
+ css_declarations_block_ptr ret;
+ bool can_continue = true;
+ css_property cur_property{css_property_type::PROPERTY_NYI,
+ css_property_flag::FLAG_NORMAL};
+ static const css_property bad_property{css_property_type::PROPERTY_NYI,
+ css_property_flag::FLAG_NORMAL};
+ std::shared_ptr<css_rule> cur_rule;
+
+ enum {
+ parse_property,
+ parse_value,
+ ignore_value, /* For unknown properties */
+ } state = parse_property;
+
+ auto seen_not = false;
+ ret = std::make_shared<css_declarations_block>();
+
+ while (can_continue) {
+ const auto &next_tok = next_block_functor();
+
+ switch (next_tok.tag) {
+ case css_consumed_block::parser_tag_type::css_component:
+ /* Component can be a property or a compound list of values */
+ if (state == parse_property) {
+ cur_property = css_property::from_token(next_tok.get_token_or_empty())
+ .value_or(bad_property);
+
+ if (cur_property.type == css_property_type::PROPERTY_NYI) {
+ state = ignore_value;
+ /* Ignore everything till ; */
+ continue;
+ }
+
+ msg_debug_css("got css property: %s", cur_property.to_string());
+
+ /* We now expect colon block */
+ const auto &expect_colon_block = next_block_functor();
+
+ if (expect_colon_block.tag != css_consumed_block::parser_tag_type::css_component) {
+ state = ignore_value; /* Ignore up to the next rule */
+ }
+ else {
+ const auto &expect_colon_tok = expect_colon_block.get_token_or_empty();
+
+ if (expect_colon_tok.type != css_parser_token::token_type::colon_token) {
+ msg_debug_css("invalid rule, no colon after property");
+ state = ignore_value; /* Ignore up to the next rule */
+ }
+ else {
+ state = parse_value;
+ cur_rule = std::make_shared<css_rule>(cur_property);
+ }
+ }
+ }
+ else if (state == parse_value) {
+ /* Check semicolon */
+ if (next_tok.is_token()) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (parser_tok.type == css_parser_token::token_type::semicolon_token && cur_rule) {
+ ret->add_rule(std::move(cur_rule));
+ state = parse_property;
+ seen_not = false;
+ continue;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::delim_token) {
+ if (parser_tok.get_string_or_default("") == "!") {
+ /* Probably something like !important */
+ seen_not = true;
+ }
+ }
+ else if (parser_tok.type == css_parser_token::token_type::ident_token) {
+ if (parser_tok.get_string_or_default("") == "important") {
+ if (seen_not) {
+ msg_debug_css("add !important flag to property %s",
+ cur_property.to_string());
+ cur_property.flag = css_property_flag::FLAG_NOT_IMPORTANT;
+ }
+ else {
+ msg_debug_css("add important flag to property %s",
+ cur_property.to_string());
+ cur_property.flag = css_property_flag::FLAG_IMPORTANT;
+ }
+
+ seen_not = false;
+
+ continue;
+ }
+ else {
+ seen_not = false;
+ }
+ }
+ }
+
+ auto maybe_value = allowed_property_value(cur_property, next_tok);
+
+ if (maybe_value) {
+ msg_debug_css("added value %s to the property %s",
+ maybe_value.value().debug_str().c_str(),
+ cur_property.to_string());
+ cur_rule->add_value(maybe_value.value());
+ }
+ }
+ else {
+ /* Ignore all till ; */
+ if (next_tok.is_token()) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ state = parse_property;
+ }
+ }
+ }
+ break;
+ case css_consumed_block::parser_tag_type::css_function:
+ if (state == parse_value) {
+ auto maybe_value = allowed_property_value(cur_property, next_tok);
+
+ if (maybe_value && cur_rule) {
+ msg_debug_css("added value %s to the property %s",
+ maybe_value.value().debug_str().c_str(),
+ cur_property.to_string());
+ cur_rule->add_value(maybe_value.value());
+ }
+ }
+ break;
+ case css_consumed_block::parser_tag_type::css_eof_block:
+ if (state == parse_value) {
+ ret->add_rule(std::move(cur_rule));
+ }
+ can_continue = false;
+ break;
+ default:
+ can_continue = false;
+ break;
+ }
+ }
+
+ return ret; /* copy elision */
+}
+
+auto css_declarations_block::merge_block(const css_declarations_block &other, merge_type how) -> void
+{
+ const auto &other_rules = other.get_rules();
+
+
+ for (auto &rule: other_rules) {
+ auto &&found_it = rules.find(rule);
+
+ if (found_it != rules.end()) {
+ /* Duplicate, need to merge */
+ switch (how) {
+ case merge_type::merge_override:
+ /* Override */
+ (*found_it)->override_values(*rule);
+ break;
+ case merge_type::merge_duplicate:
+ /* Merge values */
+ add_rule(rule);
+ break;
+ case merge_type::merge_parent:
+ /* Do not merge parent rule if more specific local one is presented */
+ break;
+ }
+ }
+ else {
+ /* New property, just insert */
+ rules.insert(rule);
+ }
+ }
+}
+
+auto css_declarations_block::compile_to_block(rspamd_mempool_t *pool) const -> rspamd::html::html_block *
+{
+ auto *block = rspamd_mempool_alloc0_type(pool, rspamd::html::html_block);
+ auto opacity = -1;
+ const css_rule *font_rule = nullptr, *background_rule = nullptr;
+
+ for (const auto &rule: rules) {
+ auto prop = rule->get_prop().type;
+ const auto &vals = rule->get_values();
+
+ if (vals.empty()) {
+ continue;
+ }
+
+ switch (prop) {
+ case css_property_type::PROPERTY_VISIBILITY:
+ case css_property_type::PROPERTY_DISPLAY: {
+ auto disp = vals.back().to_display().value_or(css_display_value::DISPLAY_INLINE);
+ block->set_display(disp);
+ break;
+ }
+ case css_property_type::PROPERTY_FONT_SIZE: {
+ auto fs = vals.back().to_dimension();
+ if (fs) {
+ block->set_font_size(fs.value().dim, fs.value().is_percent);
+ }
+ }
+ case css_property_type::PROPERTY_OPACITY: {
+ opacity = vals.back().to_number().value_or(opacity);
+ break;
+ }
+ case css_property_type::PROPERTY_FONT_COLOR:
+ case css_property_type::PROPERTY_COLOR: {
+ auto color = vals.back().to_color();
+ if (color) {
+ block->set_fgcolor(color.value());
+ }
+ break;
+ }
+ case css_property_type::PROPERTY_BGCOLOR: {
+ auto color = vals.back().to_color();
+ if (color) {
+ block->set_bgcolor(color.value());
+ }
+ break;
+ }
+ case css_property_type::PROPERTY_HEIGHT: {
+ auto w = vals.back().to_dimension();
+ if (w) {
+ block->set_width(w.value().dim, w.value().is_percent);
+ }
+ break;
+ }
+ case css_property_type::PROPERTY_WIDTH: {
+ auto h = vals.back().to_dimension();
+ if (h) {
+ block->set_width(h.value().dim, h.value().is_percent);
+ }
+ break;
+ }
+ /* Optional attributes */
+ case css_property_type::PROPERTY_FONT:
+ font_rule = rule.get();
+ break;
+ case css_property_type::PROPERTY_BACKGROUND:
+ background_rule = rule.get();
+ break;
+ default:
+ /* Do nothing for now */
+ break;
+ }
+ }
+
+ /* Optional properties */
+ if (!(block->fg_color_mask) && font_rule) {
+ auto &vals = font_rule->get_values();
+
+ for (const auto &val: vals) {
+ auto maybe_color = val.to_color();
+
+ if (maybe_color) {
+ block->set_fgcolor(maybe_color.value());
+ }
+ }
+ }
+
+ if (!(block->font_mask) && font_rule) {
+ auto &vals = font_rule->get_values();
+
+ for (const auto &val: vals) {
+ auto maybe_dim = val.to_dimension();
+
+ if (maybe_dim) {
+ block->set_font_size(maybe_dim.value().dim, maybe_dim.value().is_percent);
+ }
+ }
+ }
+
+ if (!(block->bg_color_mask) && background_rule) {
+ auto &vals = background_rule->get_values();
+
+ for (const auto &val: vals) {
+ auto maybe_color = val.to_color();
+
+ if (maybe_color) {
+ block->set_bgcolor(maybe_color.value());
+ }
+ }
+ }
+
+ return block;
+}
+
+void css_rule::add_value(const css_value &value)
+{
+ values.push_back(value);
+}
+
+
+TEST_SUITE("css")
+{
+ TEST_CASE("simple css rules")
+ {
+ const std::vector<std::pair<const char *, std::vector<css_property>>> cases{
+ {"font-size:12.0pt;line-height:115%",
+ {css_property(css_property_type::PROPERTY_FONT_SIZE)}},
+ {"font-size:12.0pt;display:none",
+ {css_property(css_property_type::PROPERTY_FONT_SIZE),
+ css_property(css_property_type::PROPERTY_DISPLAY)}}};
+
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "css", 0);
+
+ for (const auto &c: cases) {
+ auto res = process_declaration_tokens(pool,
+ get_rules_parser_functor(pool, c.first));
+
+ CHECK(res.get() != nullptr);
+
+ for (auto i = 0; i < c.second.size(); i++) {
+ CHECK(res->has_property(c.second[i]));
+ }
+ }
+ }
+}
+
+}// namespace rspamd::css \ No newline at end of file
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
new file mode 100644
index 0000000..114b83e
--- /dev/null
+++ b/src/libserver/css/css_rule.hxx
@@ -0,0 +1,153 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef RSPAMD_CSS_RULE_HXX
+#define RSPAMD_CSS_RULE_HXX
+
+#include "css_value.hxx"
+#include "css_property.hxx"
+#include "css_parser.hxx"
+#include "contrib/ankerl/unordered_dense.h"
+#include "libutil/cxx/util.hxx"
+#include "libutil/cxx/hash_util.hxx"
+#include <vector>
+#include <memory>
+
+namespace rspamd::html {
+/* Forward declaration */
+struct html_block;
+}// namespace rspamd::html
+
+namespace rspamd::css {
+
+class css_rule {
+ css_property prop;
+ using css_values_vec = std::vector<css_value>;
+ css_values_vec values;
+
+public:
+ /* We must create css rule explicitly from a property and values */
+ css_rule() = delete;
+
+ css_rule(const css_rule &other) = delete;
+
+ /* Constructors */
+ css_rule(css_rule &&other) noexcept = default;
+
+ explicit css_rule(css_property &&prop, css_values_vec &&values) noexcept
+ : prop(prop), values(std::forward<css_values_vec>(values))
+ {
+ }
+
+ explicit css_rule(const css_property &prop) noexcept
+ : prop(prop), values{}
+ {
+ }
+
+ /* Methods */
+ /* Comparison is special, as we care merely about property, not the values */
+ auto operator==(const css_rule &other) const
+ {
+ return prop == other.prop;
+ }
+
+ constexpr const css_values_vec &get_values(void) const
+ {
+ return values;
+ }
+ constexpr const css_property &get_prop(void) const
+ {
+ return prop;
+ }
+
+ /* Import values from another rules according to the importance */
+ void override_values(const css_rule &other);
+ void merge_values(const css_rule &other);
+ void add_value(const css_value &value);
+};
+
+}// namespace rspamd::css
+
+/* Make rules hashable by property */
+namespace std {
+template<>
+class hash<rspamd::css::css_rule> {
+public:
+ using is_avalanching = void;
+ constexpr auto operator()(const rspamd::css::css_rule &rule) const -> auto
+ {
+ return hash<rspamd::css::css_property>()(rule.get_prop());
+ }
+};
+
+}// namespace std
+
+namespace rspamd::css {
+
+/**
+ * Class that is designed to hold css declaration (a set of rules)
+ */
+class css_declarations_block {
+public:
+ using rule_shared_ptr = std::shared_ptr<css_rule>;
+ using rule_shared_hash = smart_ptr_hash<css_rule>;
+ using rule_shared_eq = smart_ptr_equal<css_rule>;
+ enum class merge_type {
+ merge_duplicate,
+ merge_parent,
+ merge_override
+ };
+
+ css_declarations_block() = default;
+ auto add_rule(rule_shared_ptr rule) -> bool;
+ auto merge_block(const css_declarations_block &other,
+ merge_type how = merge_type::merge_duplicate) -> void;
+ auto get_rules(void) const -> const auto &
+ {
+ return rules;
+ }
+
+ /**
+ * Returns if a declaration block has some property
+ * @param prop
+ * @return
+ */
+ auto has_property(const css_property &prop) const -> bool
+ {
+ return (rules.find(css_rule{prop}) != rules.end());
+ }
+
+ /**
+ * Compile CSS declaration to the html block
+ * @param pool used to carry memory required for html_block
+ * @return html block structure
+ */
+ auto compile_to_block(rspamd_mempool_t *pool) const -> rspamd::html::html_block *;
+
+private:
+ ankerl::unordered_dense::set<rule_shared_ptr, rule_shared_hash, rule_shared_eq> rules;
+};
+
+using css_declarations_block_ptr = std::shared_ptr<css_declarations_block>;
+
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_token_functor)
+ -> css_declarations_block_ptr;
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_RULE_HXX \ No newline at end of file
diff --git a/src/libserver/css/css_rule_parser.rl b/src/libserver/css/css_rule_parser.rl
new file mode 100644
index 0000000..e3b1876
--- /dev/null
+++ b/src/libserver/css/css_rule_parser.rl
@@ -0,0 +1,27 @@
+%%{
+ machine css_parser;
+ alphtype unsigned char;
+ include css_syntax "css_syntax.rl";
+
+ main := declaration;
+}%%
+
+%% write data;
+
+#include <cstddef>
+
+namespace rspamd::css {
+
+int
+foo (const unsigned char *data, std::size_t len)
+{
+ const unsigned char *p = data, *pe = data + len, *eof;
+ int cs;
+
+ %% write init;
+ %% write exec;
+
+ return cs;
+}
+
+} \ No newline at end of file
diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx
new file mode 100644
index 0000000..a62ffff
--- /dev/null
+++ b/src/libserver/css/css_selector.cxx
@@ -0,0 +1,226 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_selector.hxx"
+#include "css.hxx"
+#include "libserver/html/html.hxx"
+#include "fmt/core.h"
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+namespace rspamd::css {
+
+auto process_selector_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_token_functor)
+ -> selectors_vec
+{
+ selectors_vec ret;
+ bool can_continue = true;
+ enum class selector_process_state {
+ selector_parse_start = 0,
+ selector_expect_ident,
+ selector_ident_consumed,
+ selector_ignore_attribute,
+ selector_ignore_function,
+ selector_ignore_combination
+ } state = selector_process_state::selector_parse_start;
+ std::unique_ptr<css_selector> cur_selector;
+
+
+ while (can_continue) {
+ const auto &next_tok = next_token_functor();
+
+ if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (state == selector_process_state::selector_parse_start) {
+ /*
+ * At the beginning of the parsing we can expect either
+ * delim or an ident, everything else is discarded for now
+ */
+ msg_debug_css("start consume selector");
+
+ switch (parser_tok.type) {
+ case css_parser_token::token_type::delim_token: {
+ auto delim_c = parser_tok.get_delim();
+
+ if (delim_c == '.') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_CLASS);
+ state = selector_process_state::selector_expect_ident;
+ }
+ else if (delim_c == '#') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ID);
+ state = selector_process_state::selector_expect_ident;
+ }
+ else if (delim_c == '*') {
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ALL);
+ state = selector_process_state::selector_ident_consumed;
+ }
+ break;
+ }
+ case css_parser_token::token_type::ident_token: {
+ auto tag_id = html::html_tag_by_name(parser_tok.get_string_or_default(""));
+
+ if (tag_id) {
+ cur_selector = std::make_unique<css_selector>(tag_id.value());
+ }
+ state = selector_process_state::selector_ident_consumed;
+ break;
+ }
+ case css_parser_token::token_type::hash_token:
+ cur_selector = std::make_unique<css_selector>(
+ css_selector::selector_type::SELECTOR_ID);
+ cur_selector->value =
+ parser_tok.get_string_or_default("");
+ state = selector_process_state::selector_ident_consumed;
+ break;
+ default:
+ msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start",
+ next_tok.token_type_str());
+ can_continue = false;
+ break;
+ }
+ }
+ else if (state == selector_process_state::selector_expect_ident) {
+ /*
+ * We got something like a selector start, so we expect
+ * a plain ident
+ */
+ if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
+ cur_selector->value = parser_tok.get_string_or_default("");
+ state = selector_process_state::selector_ident_consumed;
+ }
+ else {
+ msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident",
+ next_tok.token_type_str());
+ can_continue = false;
+ }
+ }
+ else if (state == selector_process_state::selector_ident_consumed) {
+ if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
+ /* Got full selector, attach it to the vector and go further */
+ msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+ ret.push_back(std::move(cur_selector));
+ state = selector_process_state::selector_parse_start;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ /* TODO: implement adjustments */
+ state = selector_process_state::selector_ignore_function;
+ }
+ else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
+ /* TODO: implement attributes checks */
+ state = selector_process_state::selector_ignore_attribute;
+ }
+ else {
+ /* TODO: implement selectors combinations */
+ state = selector_process_state::selector_ignore_combination;
+ }
+ }
+ else {
+ /* Ignore state; ignore all till ',' token or eof token */
+ if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
+ /* Got full selector, attach it to the vector and go further */
+ ret.push_back(std::move(cur_selector));
+ state = selector_process_state::selector_parse_start;
+ }
+ else {
+ auto debug_str = parser_tok.get_string_or_default("");
+ msg_debug_css("ignore token %*s", (int) debug_str.size(),
+ debug_str.data());
+ }
+ }
+ }
+ else {
+ /* End of parsing */
+ if (state == selector_process_state::selector_ident_consumed && cur_selector) {
+ msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
+ ret.push_back(std::move(cur_selector));
+ }
+ else {
+ msg_debug_css("not attached selector, state: %d", static_cast<int>(state));
+ }
+ can_continue = false;
+ }
+ }
+
+ return ret; /* copy elision */
+}
+
+auto css_selector::debug_str() const -> std::string
+{
+ std::string ret;
+
+ if (type == selector_type::SELECTOR_ID) {
+ ret += "#";
+ }
+ else if (type == selector_type::SELECTOR_CLASS) {
+ ret += ".";
+ }
+ else if (type == selector_type::SELECTOR_ALL) {
+ ret = "*";
+
+ return ret;
+ }
+
+ std::visit([&](auto arg) -> void {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, tag_id_t>) {
+ ret += fmt::format("tag: {}", static_cast<int>(arg));
+ }
+ else {
+ ret += arg;
+ }
+ },
+ value);
+
+ return ret;
+}
+
+TEST_SUITE("css")
+{
+ TEST_CASE("simple css selectors")
+ {
+ const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{
+ {"em", {css_selector::selector_type::SELECTOR_TAG}},
+ {"*", {css_selector::selector_type::SELECTOR_ALL}},
+ {".class", {css_selector::selector_type::SELECTOR_CLASS}},
+ {"#id", {css_selector::selector_type::SELECTOR_ID}},
+ {"em,.class,#id", {css_selector::selector_type::SELECTOR_TAG, css_selector::selector_type::SELECTOR_CLASS, css_selector::selector_type::SELECTOR_ID}},
+ };
+
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "css", 0);
+
+ for (const auto &c: cases) {
+ auto res = process_selector_tokens(pool,
+ get_selectors_parser_functor(pool, c.first));
+
+ CHECK(c.second.size() == res.size());
+
+ for (auto i = 0; i < c.second.size(); i++) {
+ CHECK(res[i]->type == c.second[i]);
+ }
+ }
+
+ rspamd_mempool_delete(pool);
+ }
+}
+
+}// namespace rspamd::css
diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx
new file mode 100644
index 0000000..65b185a
--- /dev/null
+++ b/src/libserver/css/css_selector.hxx
@@ -0,0 +1,134 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_SELECTOR_HXX
+#define RSPAMD_CSS_SELECTOR_HXX
+
+#include <variant>
+#include <string>
+#include <optional>
+#include <vector>
+#include <memory>
+
+#include "function2/function2.hpp"
+#include "parse_error.hxx"
+#include "css_parser.hxx"
+#include "libserver/html/html_tags.h"
+#include "libcryptobox/cryptobox.h"
+
+namespace rspamd::css {
+
+/*
+ * Holds a value for css selector, internal is handled by variant
+ */
+struct css_selector {
+ enum class selector_type {
+ SELECTOR_TAG, /* e.g. tr, for this value we use tag_id_t */
+ SELECTOR_CLASS, /* generic class, e.g. .class */
+ SELECTOR_ID, /* e.g. #id */
+ SELECTOR_ALL /* * selector */
+ };
+
+ selector_type type;
+ std::variant<tag_id_t, std::string_view> value;
+
+ /* Conditions for the css selector */
+ /* Dependency on attributes */
+ struct css_attribute_condition {
+ std::string_view attribute;
+ std::string_view op = "";
+ std::string_view value = "";
+ };
+
+ /* General dependency chain */
+ using css_selector_ptr = std::unique_ptr<css_selector>;
+ using css_selector_dep = std::variant<css_attribute_condition, css_selector_ptr>;
+ std::vector<css_selector_dep> dependencies;
+
+ auto to_tag(void) const -> std::optional<tag_id_t>
+ {
+ if (type == selector_type::SELECTOR_TAG) {
+ return std::get<tag_id_t>(value);
+ }
+ return std::nullopt;
+ }
+
+ auto to_string(void) const -> std::optional<const std::string_view>
+ {
+ if (type != selector_type::SELECTOR_TAG) {
+ return std::string_view(std::get<std::string_view>(value));
+ }
+ return std::nullopt;
+ };
+
+ explicit css_selector(selector_type t)
+ : type(t)
+ {
+ }
+ explicit css_selector(tag_id_t t)
+ : type(selector_type::SELECTOR_TAG)
+ {
+ value = t;
+ }
+ explicit css_selector(const std::string_view &st, selector_type t = selector_type::SELECTOR_ID)
+ : type(t)
+ {
+ value = st;
+ }
+
+ auto operator==(const css_selector &other) const -> bool
+ {
+ return type == other.type && value == other.value;
+ }
+
+ auto debug_str(void) const -> std::string;
+};
+
+
+using selectors_vec = std::vector<std::unique_ptr<css_selector>>;
+
+/*
+ * Consume selectors token and split them to the list of selectors
+ */
+auto process_selector_tokens(rspamd_mempool_t *pool,
+ blocks_gen_functor &&next_token_functor)
+ -> selectors_vec;
+
+}// namespace rspamd::css
+
+/* Selectors hashing */
+namespace std {
+template<>
+class hash<rspamd::css::css_selector> {
+public:
+ using is_avalanching = void;
+ auto operator()(const rspamd::css::css_selector &sel) const -> std::size_t
+ {
+ if (sel.type == rspamd::css::css_selector::selector_type::SELECTOR_TAG) {
+ return static_cast<std::size_t>(std::get<tag_id_t>(sel.value));
+ }
+ else {
+ const auto &sv = std::get<std::string_view>(sel.value);
+
+ return rspamd_cryptobox_fast_hash(sv.data(), sv.size(), 0xdeadbabe);
+ }
+ }
+};
+}// namespace std
+
+#endif//RSPAMD_CSS_SELECTOR_HXX
diff --git a/src/libserver/css/css_selector_parser.rl b/src/libserver/css/css_selector_parser.rl
new file mode 100644
index 0000000..f5ae936
--- /dev/null
+++ b/src/libserver/css/css_selector_parser.rl
@@ -0,0 +1,27 @@
+%%{
+ machine css_parser;
+ alphtype unsigned char;
+ include css_syntax "css_syntax.rl";
+
+ main := selectors_group;
+}%%
+
+%% write data;
+
+#include <cstddef>
+
+namespace rspamd::css {
+
+int
+parse_css_selector (const unsigned char *data, std::size_t len)
+{
+ const unsigned char *p = data, *pe = data + len, *eof;
+ int cs;
+
+ %% write init;
+ %% write exec;
+
+ return cs;
+}
+
+} \ No newline at end of file
diff --git a/src/libserver/css/css_style.hxx b/src/libserver/css/css_style.hxx
new file mode 100644
index 0000000..429e58f
--- /dev/null
+++ b/src/libserver/css/css_style.hxx
@@ -0,0 +1,66 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_STYLE_HXX
+#define RSPAMD_CSS_STYLE_HXX
+
+#include <memory>
+#include <vector>
+#include "css_rule.hxx"
+#include "css_selector.hxx"
+
+namespace rspamd::css {
+
+/*
+ * Full CSS style representation
+ */
+class css_style {
+public:
+ /* Make class trivial */
+ css_style(const css_style &other) = default;
+
+ css_style(const std::shared_ptr<css_style> &_parent)
+ : parent(_parent)
+ {
+ propagate_from_parent();
+ }
+ css_style(const std::shared_ptr<css_style> &_parent,
+ const std::vector<std::shared_ptr<css_selector>> &_selectors)
+ : parent(_parent)
+ {
+ selectors.reserve(_selectors.size());
+
+ for (const auto &sel_ptr: _selectors) {
+ selectors.emplace_back(sel_ptr);
+ }
+
+ propagate_from_parent();
+ }
+
+private:
+ std::vector<std::weak_ptr<css_selector>> selectors;
+ std::weak_ptr<css_style> parent;
+ std::vector<css_rule> rules;
+
+private:
+ void propagate_from_parent(void); /* Construct full style using parent */
+};
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_STYLE_HXX
diff --git a/src/libserver/css/css_syntax.rl b/src/libserver/css/css_syntax.rl
new file mode 100644
index 0000000..93da44b
--- /dev/null
+++ b/src/libserver/css/css_syntax.rl
@@ -0,0 +1,110 @@
+%%{
+ # CSS3 EBNF derived
+ machine css_syntax;
+
+ # Primitive Atoms
+ COMMENT = (
+ '/*' ( any )* :>> '*/'
+ );
+ QUOTED_STRING = ('"' ( [^"\\] | /\\./ )* "'");
+ BARE_URL_CHARS = ((0x21
+ | 0x23..0x26
+ | 0x2A..0xFF)+);
+ BARE_URL = BARE_URL_CHARS;
+ URL = 'url(' ( QUOTED_STRING | space* BARE_URL space* ) ')';
+ nonascii = [^0x00-0x7F];
+ nmstart = ([_a-zA-Z] | nonascii);
+ nmchar = ([_a-zA-Z0-9] | 0x2D | nonascii);
+ name = nmchar+;
+ num = ([0-9]+ | ([0-9]* '.' [0-9]+));
+ CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n");
+ IDENT = ([\-]? nmstart nmchar*);
+ ATTR = 'attr(' IDENT ')';
+
+ DIMENSION = '-'? num space? ( 'ch' | 'cm' | 'em' | 'ex' | 'fr' | 'in' | 'mm' | 'pc' | 'pt' | 'px' | 'Q' | 'rem' | 'vh' | 'vmax' | 'vmin' | 'vw' | 'dpi' );
+ NUMBER = '-'? num;
+ HASH = '#' name;
+ HEX = '#' [0-9a-fA-F]{1,6};
+ PERCENTAGE = '-'? num '%';
+ INCLUDES = '~=';
+ DASHMATCH = '|=';
+ PREFIXMATCH = '^=';
+ SUFFIXMATCH = '$=';
+ SUBSTRINGMATCH = '*=';
+ PLUS = '+';
+ GREATER = '>';
+ COMMA = ',';
+ TILDE = '~';
+ S = space;
+
+ # Property name
+ property = ( QUOTED_STRING | IDENT );
+
+ # Values
+ important = space* '!' space* 'important';
+ expression = ( ( '+' | PERCENTAGE | URL | ATTR | HEX | '-' | DIMENSION | NUMBER | QUOTED_STRING | IDENT | ',') S* )+;
+ functional_pseudo = (IDENT - ('attr'|'url')) '(' space* expression? ')';
+ value = ( URL | ATTR | PLUS | HEX | PERCENTAGE | '-' | DIMENSION | NUMBER | QUOTED_STRING | IDENT | functional_pseudo);
+ values = value (space value | '/' value )* ( space* ',' space* value (space value | '/' value )* )* important?;
+
+ # Declaration definition
+ declaration = (property space? ':' (property ':')* space? values);
+
+ # Selectors
+ class = '.' IDENT;
+ element_name = IDENT;
+ namespace_prefix = ( IDENT | '*' )? '|';
+ type_selector = namespace_prefix? element_name;
+ universal = namespace_prefix? '*';
+ attrib = '[' space* namespace_prefix? IDENT space* ( ( PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | '=' | INCLUDES | DASHMATCH ) space* ( IDENT | QUOTED_STRING ) space* )? ']';
+ pseudo = ':' ':'? ( IDENT | functional_pseudo );
+ atrule = '@' IDENT;
+ mediaquery_selector = '(' declaration ')';
+ negation_arg = type_selector
+ | universal
+ | HASH
+ | class
+ | attrib
+ | pseudo;
+ negation = 'NOT'|'not' space* negation_arg space* ')';
+ # Haha, so simple...
+ # there should be also mediaquery_selector but it makes grammar too large, so rip it off
+ simple_selector_sequence = ( type_selector | universal ) ( HASH | class | attrib | pseudo | negation | atrule )*
+ | ( HASH | class | attrib | pseudo | negation | atrule )+;
+ combinator = space* PLUS space*
+ | space* GREATER space*
+ | space* TILDE space*
+ | space+;
+ # Combine simple stuff and obtain just... an ordinary selector, bingo
+ selector = simple_selector_sequence ( combinator simple_selector_sequence )*;
+ # Multiple beasts
+ selectors_group = selector ( COMMENT? ',' space* selector )*;
+
+ # Rules
+ # This is mostly used stuff
+ rule = selectors_group space? "{" space*
+ (COMMENT? space* declaration ( space? ";" space? declaration?)* ";"? space?)* COMMENT* space* '}';
+ query_declaration = rule;
+
+ # Areas used in css
+ arearule = '@'('bottom-left'|'bottom-right'|'top-left'|'top-right');
+ areaquery = arearule space? '{' space* (COMMENT? space* declaration ( S? ';' S? declaration?)* ';'? space?)* COMMENT* space* '}';
+ # Printed media stuff, useless but we have to parse it :(
+ printcssrule = '@media print';
+ pagearea = ':'('left'|'right');
+ pagerule = '@page' space? pagearea?;
+ pagequery = pagerule space? '{' space* (areaquery| (COMMENT? space* declaration ( space? ';' space? declaration?)* ';'? S?)*) COMMENT* space* '}';
+ printcssquery = printcssrule S? '{' ( S? COMMENT* S? (pagequery| COMMENT|query_declaration) S*)* S? '}';
+ # Something that defines media
+ conditions = ('and'|'screen'|'or'|'only'|'not'|'amzn-mobi'|'amzn-kf8'|'amzn-mobi7'|',');
+ mediarule = '@media' space conditions ( space? conditions| space? mediaquery_selector )*;
+ mediaquery = mediarule space? '{' ( space? COMMENT* query_declaration)* S? '}';
+
+ simple_atrule = ("@charset"|"@namespace") space+ QUOTED_STRING space* ";";
+
+ import_rule = "@import" space+ ( QUOTED_STRING | URL ) space* ";";
+
+ # Final css definition
+ css_style = space* ( ( rule | simple_atrule | import_rule | mediaquery | printcssquery | COMMENT) space* )*;
+
+}%% \ No newline at end of file
diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx
new file mode 100644
index 0000000..6d3f41e
--- /dev/null
+++ b/src/libserver/css/css_tokeniser.cxx
@@ -0,0 +1,836 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_tokeniser.hxx"
+#include "css_util.hxx"
+#include "css.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include <string>
+#include <cmath>
+
+namespace rspamd::css {
+
+/* Helpers to create tokens */
+
+/*
+ * This helper is intended to create tokens either with a tag and value
+ * or with just a tag.
+ */
+template<css_parser_token::token_type T, class Arg>
+auto make_token(const Arg &arg) -> css_parser_token;
+
+template<>
+auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::string_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::ident_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::ident_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::function_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::function_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::url_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::url_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::whitespace_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::delim_token, char>(const char &c)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::delim_token, c};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::number_token, float>(const float &d)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::number_token, d};
+}
+
+/*
+ * Generic tokens with no value (non-terminals)
+ */
+template<css_parser_token::token_type T>
+auto make_token(void) -> css_parser_token
+{
+ return css_parser_token{T, css_parser_token_placeholder()};
+}
+
+static constexpr inline auto is_plain_ident_start(char c) -> bool
+{
+ if ((c & 0x80) || g_ascii_isalpha(c) || c == '_') {
+ return true;
+ }
+
+ return false;
+};
+
+static constexpr inline auto is_plain_ident(char c) -> bool
+{
+ if (is_plain_ident_start(c) || c == '-' || g_ascii_isdigit(c)) {
+ return true;
+ }
+
+ return false;
+};
+
+struct css_dimension_data {
+ css_parser_token::dim_type dtype;
+ double mult;
+};
+
+/*
+ * Maps from css dimensions to the multipliers that look reasonable in email
+ */
+constexpr const auto max_dims = static_cast<int>(css_parser_token::dim_type::dim_max);
+constexpr frozen::unordered_map<frozen::string, css_dimension_data, max_dims> dimensions_map{
+ {"px", {css_parser_token::dim_type::dim_px, 1.0}},
+ /* EM/REM are 16 px, so multiply and round */
+ {"em", {css_parser_token::dim_type::dim_em, 16.0}},
+ {"rem", {css_parser_token::dim_type::dim_rem, 16.0}},
+ /*
+ * Represents the x-height of the element's font.
+ * On fonts with the "x" letter, this is generally the height
+ * of lowercase letters in the font; 1ex = 0.5em in many fonts.
+ */
+ {"ex", {css_parser_token::dim_type::dim_ex, 8.0}},
+ {"wv", {css_parser_token::dim_type::dim_wv, 8.0}},
+ {"wh", {css_parser_token::dim_type::dim_wh, 6.0}},
+ {"vmax", {css_parser_token::dim_type::dim_vmax, 8.0}},
+ {"vmin", {css_parser_token::dim_type::dim_vmin, 6.0}},
+ /* One point. 1pt = 1/72nd of 1in */
+ {"pt", {css_parser_token::dim_type::dim_pt, 96.0 / 72.0}},
+ /* 96px/2.54 */
+ {"cm", {css_parser_token::dim_type::dim_cm, 96.0 / 2.54}},
+ {"mm", {css_parser_token::dim_type::dim_mm, 9.60 / 2.54}},
+ {"in", {css_parser_token::dim_type::dim_in, 96.0}},
+ /* 1pc = 12pt = 1/6th of 1in. */
+ {"pc", {css_parser_token::dim_type::dim_pc, 96.0 / 6.0}}};
+
+auto css_parser_token::adjust_dim(const css_parser_token &dim_token) -> bool
+{
+ if (!std::holds_alternative<float>(value) ||
+ !std::holds_alternative<std::string_view>(dim_token.value)) {
+ /* Invalid tokens */
+ return false;
+ }
+
+ auto num = std::get<float>(value);
+ auto sv = std::get<std::string_view>(dim_token.value);
+
+ auto dim_found = find_map(dimensions_map, sv);
+
+ if (dim_found) {
+ auto dim_elt = dim_found.value().get();
+ dimension_type = dim_elt.dtype;
+ flags |= css_parser_token::number_dimension;
+ num *= dim_elt.mult;
+ }
+ else {
+ flags |= css_parser_token::flag_bad_dimension;
+
+ return false;
+ }
+
+ value = num;
+
+ return true;
+}
+
+
+/*
+ * Consume functions: return a token and advance lexer offset
+ */
+auto css_tokeniser::consume_ident(bool allow_number) -> struct css_parser_token {
+ auto i = offset;
+ auto need_escape = false;
+ auto allow_middle_minus = false;
+
+ auto maybe_escape_sv = [&](auto cur_pos, auto tok_type) -> auto {
+ if (need_escape) {
+ auto escaped = rspamd::css::unescape_css(pool, {&input[offset],
+ cur_pos - offset});
+ offset = cur_pos;
+
+ return css_parser_token{tok_type, escaped};
+ }
+
+ auto result = std::string_view{&input[offset], cur_pos - offset};
+ offset = cur_pos;
+
+ return css_parser_token{tok_type, result};
+ };
+
+ /* Ident token can start from `-` or `--` */
+ if (input[i] == '-') {
+ i++;
+
+ if (i < input.size() && input[i] == '-') {
+ i++;
+ allow_middle_minus = true;
+ }
+ }
+
+ while (i < input.size()) {
+ auto c = input[i];
+
+ auto is_plain_c = (allow_number || allow_middle_minus) ? is_plain_ident(c) : is_plain_ident_start(c);
+ if (!is_plain_c) {
+ if (c == '\\' && i + 1 < input.size()) {
+ /* Escape token */
+ need_escape = true;
+ auto nhex = 0;
+
+ /* Need to find an escape end */
+ do {
+ c = input[++i];
+ if (g_ascii_isxdigit(c)) {
+ nhex++;
+
+ if (nhex > 6) {
+ /* End of the escape */
+ break;
+ }
+ }
+ else if (nhex > 0 && c == ' ') {
+ /* \[hex]{1,6} */
+ i++; /* Skip one space */
+ break;
+ }
+ else {
+ /* Single \ + char */
+ break;
+ }
+ } while (i < input.size());
+ }
+ else if (c == '(') {
+ /* Function or url token */
+ auto j = i + 1;
+
+ while (j < input.size() && g_ascii_isspace(input[j])) {
+ j++;
+ }
+
+ if (input.size() - offset > 3 && input.substr(offset, 3) == "url") {
+ if (j < input.size() && (input[j] == '"' || input[j] == '\'')) {
+ /* Function token */
+ auto ret = maybe_escape_sv(i,
+ css_parser_token::token_type::function_token);
+ return ret;
+ }
+ else {
+ /* Consume URL token */
+ while (j < input.size() && input[j] != ')') {
+ j++;
+ }
+
+ if (j < input.size() && input[j] == ')') {
+ /* Valid url token */
+ auto ret = maybe_escape_sv(j + 1,
+ css_parser_token::token_type::url_token);
+ return ret;
+ }
+ else {
+ /* Incomplete url token */
+ auto ret = maybe_escape_sv(j,
+ css_parser_token::token_type::url_token);
+
+ ret.flags |= css_parser_token::flag_bad_string;
+ return ret;
+ }
+ }
+ }
+ else {
+ auto ret = maybe_escape_sv(i,
+ css_parser_token::token_type::function_token);
+ return ret;
+ }
+ }
+ else if (c == '-' && allow_middle_minus) {
+ i++;
+ continue;
+ }
+ else {
+ break; /* Not an ident token */
+ }
+ } /* !plain ident */
+ else {
+ allow_middle_minus = true;
+ }
+
+ i++;
+ }
+
+ return maybe_escape_sv(i, css_parser_token::token_type::ident_token);
+}
+
+auto
+css_tokeniser::consume_number() -> struct css_parser_token {
+ auto i = offset;
+ auto seen_dot = false, seen_exp = false;
+
+ if (input[i] == '-' || input[i] == '+') {
+ i++;
+ }
+ if (input[i] == '.' && i < input.size()) {
+ seen_dot = true;
+ i++;
+ }
+
+ while (i < input.size()) {
+ auto c = input[i];
+
+ if (!g_ascii_isdigit(c)) {
+ if (c == '.') {
+ if (!seen_dot) {
+ seen_dot = true;
+ }
+ else {
+ break;
+ }
+ }
+ else if (c == 'e' || c == 'E') {
+ if (!seen_exp) {
+ seen_exp = true;
+ seen_dot = true; /* dots are not allowed after e */
+
+ if (i + 1 < input.size()) {
+ auto next_c = input[i + 1];
+ if (next_c == '+' || next_c == '-') {
+ i++;
+ }
+ else if (!g_ascii_isdigit(next_c)) {
+ /* Not an exponent */
+ break;
+ }
+ }
+ else {
+ /* Not an exponent */
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ i++;
+ }
+
+ if (i > offset) {
+ /* I wish it was supported properly */
+ //auto conv_res = std::from_chars(&input[offset], &input[i], num);
+ char numbuf[128], *endptr = nullptr;
+ rspamd_strlcpy(numbuf, &input[offset], MIN(i - offset + 1, sizeof(numbuf)));
+ auto num = g_ascii_strtod(numbuf, &endptr);
+ offset = i;
+
+ if (fabs(num) >= G_MAXFLOAT || std::isnan(num)) {
+ msg_debug_css("invalid number: %s", numbuf);
+ return make_token<css_parser_token::token_type::delim_token>(input[i - 1]);
+ }
+ else {
+
+ auto ret = make_token<css_parser_token::token_type::number_token>(static_cast<float>(num));
+
+ if (i < input.size()) {
+ if (input[i] == '%') {
+ ret.flags |= css_parser_token::number_percent;
+ i++;
+
+ offset = i;
+ }
+ else if (is_plain_ident_start(input[i])) {
+ auto dim_token = consume_ident();
+
+ if (dim_token.type == css_parser_token::token_type::ident_token) {
+ if (!ret.adjust_dim(dim_token)) {
+ auto sv = std::get<std::string_view>(dim_token.value);
+ msg_debug_css("cannot apply dimension from the token %*s; number value = %.1f",
+ (int) sv.size(), sv.begin(), num);
+ /* Unconsume ident */
+ offset = i;
+ }
+ }
+ else {
+ /* We have no option but to uncosume ident token in this case */
+ msg_debug_css("got invalid ident like token after number, unconsume it");
+ }
+ }
+ else {
+ /* Plain number, nothing to do */
+ }
+ }
+
+ return ret;
+ }
+ }
+ else {
+ msg_err_css("internal error: invalid number, empty token");
+ i++;
+ }
+
+ offset = i;
+ /* Should not happen */
+ return make_token<css_parser_token::token_type::delim_token>(input[i - 1]);
+}
+
+/*
+ * Main routine to produce lexer tokens
+ */
+auto
+css_tokeniser::next_token(void) -> struct css_parser_token {
+ /* Check pushback queue */
+ if (!backlog.empty()) {
+ auto tok = backlog.front();
+ backlog.pop_front();
+
+ return tok;
+ }
+ /* Helpers */
+
+ /*
+ * This lambda eats comment handling nested comments;
+ * offset is set to the next character after a comment (or eof)
+ * Nothing is returned
+ */
+ auto consume_comment = [this]() {
+ auto i = offset;
+ auto nested = 0;
+
+ if (input.empty()) {
+ /* Nothing to consume */
+ return;
+ }
+
+ /* We handle nested comments just because they can exist... */
+ while (i < input.size() - 1) {
+ auto c = input[i];
+ if (c == '*' && input[i + 1] == '/') {
+ if (nested == 0) {
+ offset = i + 2;
+ return;
+ }
+ else {
+ nested--;
+ i += 2;
+ continue;
+ }
+ }
+ else if (c == '/' && input[i + 1] == '*') {
+ nested++;
+ i += 2;
+ continue;
+ }
+
+ i++;
+ }
+
+ offset = i;
+ };
+
+ /*
+ * Consume quoted string, returns a string_view over a string, offset
+ * is set one character after the string. Css unescaping is done automatically
+ * Accepts a quote char to find end of string
+ */
+ auto consume_string = [this](auto quote_char) -> auto {
+ auto i = offset;
+ bool need_unescape = false;
+
+ while (i < input.size()) {
+ auto c = input[i];
+
+ if (c == '\\') {
+ if (i + 1 < input.size()) {
+ need_unescape = true;
+ }
+ else {
+ /* \ at the end -> ignore */
+ }
+ }
+ else if (c == quote_char) {
+ /* End of string */
+ std::string_view res{&input[offset], i - offset};
+
+ if (need_unescape) {
+ res = rspamd::css::unescape_css(pool, res);
+ }
+
+ offset = i + 1;
+
+ return res;
+ }
+ else if (c == '\n') {
+ /* Should be a error, but we ignore it for now */
+ }
+
+ i++;
+ }
+
+ /* EOF with no quote character, consider it fine */
+ std::string_view res{&input[offset], i - offset};
+
+ if (need_unescape) {
+ res = rspamd::css::unescape_css(pool, res);
+ }
+
+ offset = i;
+
+ return res;
+ };
+
+ /* Main tokenisation loop */
+ for (auto i = offset; i < input.size(); ++i) {
+ auto c = input[i];
+
+ switch (c) {
+ case '/':
+ if (i + 1 < input.size() && input[i + 1] == '*') {
+ offset = i + 2;
+ consume_comment(); /* Consume comment and go forward */
+ return next_token(); /* Tail call */
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f': {
+ /* Consume as much space as we can */
+ while (i < input.size() && g_ascii_isspace(input[i])) {
+ i++;
+ }
+
+ auto ret = make_token<css_parser_token::token_type::whitespace_token>(
+ std::string_view(&input[offset], i - offset));
+ offset = i;
+ return ret;
+ }
+ case '"':
+ case '\'':
+ offset = i + 1;
+ if (offset < input.size()) {
+ return make_token<css_parser_token::token_type::string_token>(consume_string(c));
+ }
+ else {
+ /* Unpaired quote at the end of the rule */
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ case '(':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::obrace_token>();
+ case ')':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::ebrace_token>();
+ case '[':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::osqbrace_token>();
+ case ']':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::esqbrace_token>();
+ case '{':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::ocurlbrace_token>();
+ case '}':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::ecurlbrace_token>();
+ case ',':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::comma_token>();
+ case ';':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::semicolon_token>();
+ case ':':
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::colon_token>();
+ case '<':
+ /* Maybe an xml like comment */
+ if (i + 3 < input.size() && input[i + 1] == '!' && input[i + 2] == '-' && input[i + 3] == '-') {
+ offset += 3;
+
+ return make_token<css_parser_token::token_type::cdo_token>();
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case '-':
+ if (i + 1 < input.size()) {
+ auto next_c = input[i + 1];
+
+ if (g_ascii_isdigit(next_c)) {
+ /* negative number */
+ return consume_number();
+ }
+ else if (next_c == '-') {
+ if (i + 2 < input.size() && input[i + 2] == '>') {
+ /* XML like comment */
+ offset += 3;
+
+ return make_token<css_parser_token::token_type::cdc_token>();
+ }
+ }
+ }
+ /* No other options, a delimiter - */
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+
+ break;
+ case '+':
+ case '.':
+ /* Maybe number */
+ if (i + 1 < input.size()) {
+ auto next_c = input[i + 1];
+
+ if (g_ascii_isdigit(next_c)) {
+ /* Numeric token */
+ return consume_number();
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ }
+ /* No other options, a delimiter - */
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+
+ break;
+ case '\\':
+ if (i + 1 < input.size()) {
+ if (input[i + 1] == '\n' || input[i + 1] == '\r') {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ else {
+ /* Valid escape, assume ident */
+ return consume_ident();
+ }
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case '@':
+ if (i + 3 < input.size()) {
+ if (is_plain_ident_start(input[i + 1]) &&
+ is_plain_ident(input[i + 2]) && is_plain_ident(input[i + 3])) {
+ offset = i + 1;
+ auto ident_token = consume_ident();
+
+ if (ident_token.type == css_parser_token::token_type::ident_token) {
+ /* Update type */
+ ident_token.type = css_parser_token::token_type::at_keyword_token;
+ }
+
+ return ident_token;
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ case '#':
+ /* TODO: make it more conformant */
+ if (i + 2 < input.size()) {
+ auto next_c = input[i + 1], next_next_c = input[i + 2];
+ if ((is_plain_ident(next_c) || next_c == '-') &&
+ (is_plain_ident(next_next_c) || next_next_c == '-')) {
+ offset = i + 1;
+ /* We consume indent, but we allow numbers there */
+ auto ident_token = consume_ident(true);
+
+ if (ident_token.type == css_parser_token::token_type::ident_token) {
+ /* Update type */
+ ident_token.type = css_parser_token::token_type::hash_token;
+ }
+
+ return ident_token;
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ default:
+ /* Generic parsing code */
+
+ if (g_ascii_isdigit(c)) {
+ return consume_number();
+ }
+ else if (is_plain_ident_start(c)) {
+ return consume_ident();
+ }
+ else {
+ offset = i + 1;
+ return make_token<css_parser_token::token_type::delim_token>(c);
+ }
+ break;
+ }
+ }
+
+ return make_token<css_parser_token::token_type::eof_token>();
+}
+
+constexpr auto
+css_parser_token::get_token_type() -> const char *
+{
+ const char *ret = "unknown";
+
+ switch (type) {
+ case token_type::whitespace_token:
+ ret = "whitespace";
+ break;
+ case token_type::ident_token:
+ ret = "ident";
+ break;
+ case token_type::function_token:
+ ret = "function";
+ break;
+ case token_type::at_keyword_token:
+ ret = "atkeyword";
+ break;
+ case token_type::hash_token:
+ ret = "hash";
+ break;
+ case token_type::string_token:
+ ret = "string";
+ break;
+ case token_type::number_token:
+ ret = "number";
+ break;
+ case token_type::url_token:
+ ret = "url";
+ break;
+ case token_type::cdo_token: /* xml open comment */
+ ret = "cdo";
+ break;
+ case token_type::cdc_token: /* xml close comment */
+ ret = "cdc";
+ break;
+ case token_type::delim_token:
+ ret = "delim";
+ break;
+ case token_type::obrace_token: /* ( */
+ ret = "obrace";
+ break;
+ case token_type::ebrace_token: /* ) */
+ ret = "ebrace";
+ break;
+ case token_type::osqbrace_token: /* [ */
+ ret = "osqbrace";
+ break;
+ case token_type::esqbrace_token: /* ] */
+ ret = "esqbrace";
+ break;
+ case token_type::ocurlbrace_token: /* { */
+ ret = "ocurlbrace";
+ break;
+ case token_type::ecurlbrace_token: /* } */
+ ret = "ecurlbrace";
+ break;
+ case token_type::comma_token:
+ ret = "comma";
+ break;
+ case token_type::colon_token:
+ ret = "colon";
+ break;
+ case token_type::semicolon_token:
+ ret = "semicolon";
+ break;
+ case token_type::eof_token:
+ ret = "eof";
+ break;
+ }
+
+ return ret;
+}
+
+
+auto css_parser_token::debug_token_str() -> std::string
+{
+ const auto *token_type_str = get_token_type();
+ std::string ret = token_type_str;
+
+ std::visit([&](auto arg) -> auto {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::string_view> || std::is_same_v<T, char>) {
+ ret += "; value=";
+ ret += arg;
+ }
+ else if constexpr (std::is_same_v<T, double>) {
+ ret += "; value=";
+ ret += std::to_string(arg);
+ }
+ },
+ value);
+
+ if ((flags & (~number_dimension)) != default_flags) {
+ ret += "; flags=" + std::to_string(flags);
+ }
+
+ if (flags & number_dimension) {
+ ret += "; dim=" + std::to_string(static_cast<int>(dimension_type));
+ }
+
+ return ret; /* Copy elision */
+}
+
+}// namespace rspamd::css \ No newline at end of file
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
new file mode 100644
index 0000000..aa6a1a7
--- /dev/null
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -0,0 +1,215 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_TOKENISER_HXX
+#define RSPAMD_CSS_TOKENISER_HXX
+
+#include <string_view>
+#include <utility>
+#include <variant>
+#include <list>
+#include <functional>
+#include <cstdint>
+#include "mem_pool.h"
+
+namespace rspamd::css {
+
+struct css_parser_token_placeholder {}; /* For empty tokens */
+
+struct css_parser_token {
+
+ enum class token_type : std::uint8_t {
+ whitespace_token,
+ ident_token,
+ function_token,
+ at_keyword_token,
+ hash_token,
+ string_token,
+ number_token,
+ url_token,
+ cdo_token, /* xml open comment */
+ cdc_token, /* xml close comment */
+ delim_token,
+ obrace_token, /* ( */
+ ebrace_token, /* ) */
+ osqbrace_token, /* [ */
+ esqbrace_token, /* ] */
+ ocurlbrace_token, /* { */
+ ecurlbrace_token, /* } */
+ comma_token,
+ colon_token,
+ semicolon_token,
+ eof_token,
+ };
+
+ enum class dim_type : std::uint8_t {
+ dim_px = 0,
+ dim_em,
+ dim_rem,
+ dim_ex,
+ dim_wv,
+ dim_wh,
+ dim_vmax,
+ dim_vmin,
+ dim_pt,
+ dim_cm,
+ dim_mm,
+ dim_in,
+ dim_pc,
+ dim_max,
+ };
+
+ static const std::uint8_t default_flags = 0;
+ static const std::uint8_t flag_bad_string = (1u << 0u);
+ static const std::uint8_t number_dimension = (1u << 1u);
+ static const std::uint8_t number_percent = (1u << 2u);
+ static const std::uint8_t flag_bad_dimension = (1u << 3u);
+
+ using value_type = std::variant<std::string_view, /* For strings and string like tokens */
+ char, /* For delimiters (might need to move to unicode point) */
+ float, /* For numeric stuff */
+ css_parser_token_placeholder /* For general no token stuff */
+ >;
+
+ /* Typed storage */
+ value_type value;
+
+ int lineno;
+
+ token_type type;
+ std::uint8_t flags = default_flags;
+ dim_type dimension_type;
+
+ css_parser_token() = delete;
+ explicit css_parser_token(token_type type, const value_type &value)
+ : value(value), type(type)
+ {
+ }
+ css_parser_token(css_parser_token &&other) = default;
+ css_parser_token(const css_parser_token &token) = default;
+ auto operator=(css_parser_token &&other) -> css_parser_token & = default;
+ auto adjust_dim(const css_parser_token &dim_token) -> bool;
+
+ auto get_string_or_default(const std::string_view &def) const -> std::string_view
+ {
+ if (std::holds_alternative<std::string_view>(value)) {
+ return std::get<std::string_view>(value);
+ }
+ else if (std::holds_alternative<char>(value)) {
+ return std::string_view(&std::get<char>(value), 1);
+ }
+
+ return def;
+ }
+
+ auto get_delim() const -> char
+ {
+ if (std::holds_alternative<char>(value)) {
+ return std::get<char>(value);
+ }
+
+ return (char) -1;
+ }
+
+ auto get_number_or_default(float def) const -> float
+ {
+ if (std::holds_alternative<float>(value)) {
+ auto dbl = std::get<float>(value);
+
+ if (flags & css_parser_token::number_percent) {
+ dbl /= 100.0;
+ }
+
+ return dbl;
+ }
+
+ return def;
+ }
+
+ auto get_normal_number_or_default(float def) const -> float
+ {
+ if (std::holds_alternative<float>(value)) {
+ auto dbl = std::get<float>(value);
+
+ if (flags & css_parser_token::number_percent) {
+ dbl /= 100.0;
+ }
+
+ if (dbl < 0) {
+ return 0.0;
+ }
+ else if (dbl > 1.0) {
+ return 1.0;
+ }
+
+ return dbl;
+ }
+
+ return def;
+ }
+
+ /* Debugging routines */
+ constexpr auto get_token_type() -> const char *;
+ /* This function might be slow */
+ auto debug_token_str() -> std::string;
+};
+
+static auto css_parser_eof_token(void) -> const css_parser_token &
+{
+ static css_parser_token eof_tok{
+ css_parser_token::token_type::eof_token,
+ css_parser_token_placeholder()};
+
+ return eof_tok;
+}
+
+/* Ensure that parser tokens are simple enough */
+/*
+ * compiler must implement P0602 "variant and optional should propagate copy/move triviality"
+ * This is broken on gcc < 8!
+ */
+static_assert(std::is_trivially_copyable_v<css_parser_token>);
+
+class css_tokeniser {
+public:
+ css_tokeniser() = delete;
+ css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv)
+ : input(sv), offset(0), pool(pool)
+ {
+ }
+
+ auto next_token(void) -> struct css_parser_token;
+ auto pushback_token(const struct css_parser_token &t) const -> void
+ {
+ backlog.push_back(t);
+ }
+
+private:
+ std::string_view input;
+ std::size_t offset;
+ rspamd_mempool_t *pool;
+ mutable std::list<css_parser_token> backlog;
+
+ auto consume_number() -> struct css_parser_token;
+ auto consume_ident(bool allow_number = false) -> struct css_parser_token;
+};
+
+}// namespace rspamd::css
+
+
+#endif//RSPAMD_CSS_TOKENISER_HXX
diff --git a/src/libserver/css/css_util.cxx b/src/libserver/css/css_util.cxx
new file mode 100644
index 0000000..07f8722
--- /dev/null
+++ b/src/libserver/css/css_util.cxx
@@ -0,0 +1,157 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_util.hxx"
+#include "css.hxx"
+#include <unicode/utf8.h>
+
+namespace rspamd::css {
+
+std::string_view unescape_css(rspamd_mempool_t *pool,
+ const std::string_view &sv)
+{
+ auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, sv.length()));
+ auto *d = nspace;
+ auto nleft = sv.length();
+
+ enum {
+ normal = 0,
+ quoted,
+ escape,
+ skip_spaces,
+ } state = normal;
+
+ char quote_char, prev_c = 0;
+ auto escape_offset = 0, i = 0;
+
+#define MAYBE_CONSUME_CHAR(c) \
+ do { \
+ if ((c) == '"' || (c) == '\'') { \
+ state = quoted; \
+ quote_char = (c); \
+ nleft--; \
+ *d++ = (c); \
+ } \
+ else if ((c) == '\\') { \
+ escape_offset = i; \
+ state = escape; \
+ } \
+ else { \
+ state = normal; \
+ nleft--; \
+ *d++ = g_ascii_tolower(c); \
+ } \
+ } while (0)
+
+ for (const auto c: sv) {
+ if (nleft == 0) {
+ msg_err_css("cannot unescape css: truncated buffer of size %d",
+ (int) sv.length());
+ break;
+ }
+ switch (state) {
+ case normal:
+ MAYBE_CONSUME_CHAR(c);
+ break;
+ case quoted:
+ if (c == quote_char) {
+ if (prev_c != '\\') {
+ state = normal;
+ }
+ }
+ prev_c = c;
+ nleft--;
+ *d++ = c;
+ break;
+ case escape:
+ if (!g_ascii_isxdigit(c)) {
+ if (i > escape_offset + 1) {
+ /* Try to decode an escape */
+ const auto *escape_start = &sv[escape_offset + 1];
+ unsigned long val;
+
+ if (!rspamd_xstrtoul(escape_start, i - escape_offset - 1, &val)) {
+ msg_debug_css("invalid broken escape found at pos %d",
+ escape_offset);
+ }
+ else {
+ if (val < 0x80) {
+ /* Trivial case: ascii character */
+ *d++ = (unsigned char) g_ascii_tolower(val);
+ nleft--;
+ }
+ else {
+ UChar32 uc = val;
+ auto off = 0;
+ UTF8_APPEND_CHAR_SAFE((uint8_t *) d, off,
+ sv.length(), u_tolower(uc));
+ d += off;
+ nleft -= off;
+ }
+ }
+ }
+ else {
+ /* Empty escape, ignore it */
+ msg_debug_css("invalid empty escape found at pos %d",
+ escape_offset);
+ }
+
+ if (nleft <= 0) {
+ msg_err_css("cannot unescape css: truncated buffer of size %d",
+ (int) sv.length());
+ }
+ else {
+ /* Escape is done, advance forward */
+ if (g_ascii_isspace(c)) {
+ state = skip_spaces;
+ }
+ else {
+ MAYBE_CONSUME_CHAR(c);
+ }
+ }
+ }
+ break;
+ case skip_spaces:
+ if (!g_ascii_isspace(c)) {
+ MAYBE_CONSUME_CHAR(c);
+ }
+ /* Ignore spaces */
+ break;
+ }
+
+ i++;
+ }
+
+ return std::string_view{nspace, sv.size() - nleft};
+}
+
+}// namespace rspamd::css
+
+/* C API */
+const gchar *rspamd_css_unescape(rspamd_mempool_t *pool,
+ const guchar *begin,
+ gsize len,
+ gsize *outlen)
+{
+ auto sv = rspamd::css::unescape_css(pool, {(const char *) begin, len});
+ const auto *v = sv.begin();
+
+ if (outlen) {
+ *outlen = sv.size();
+ }
+
+ return v;
+} \ No newline at end of file
diff --git a/src/libserver/css/css_util.hxx b/src/libserver/css/css_util.hxx
new file mode 100644
index 0000000..4837a46
--- /dev/null
+++ b/src/libserver/css/css_util.hxx
@@ -0,0 +1,37 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_UTIL_HXX
+#define RSPAMD_CSS_UTIL_HXX
+
+#include <string_view>
+#include "mem_pool.h"
+
+namespace rspamd::css {
+
+/*
+ * Unescape css escapes
+ * \20AC : must be followed by a space if the next character is one of a-f, A-F, 0-9
+ * \0020AC : must be 6 digits long, no space needed (but can be included)
+ */
+std::string_view unescape_css(rspamd_mempool_t *pool,
+ const std::string_view &sv);
+
+}// namespace rspamd::css
+
+#endif//RSPAMD_CSS_UTIL_HXX
diff --git a/src/libserver/css/css_value.cxx b/src/libserver/css/css_value.cxx
new file mode 100644
index 0000000..2546e01
--- /dev/null
+++ b/src/libserver/css/css_value.cxx
@@ -0,0 +1,449 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_value.hxx"
+#include "css_colors_list.hxx"
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
+#include "libutil/util.h"
+#include "contrib/ankerl/unordered_dense.h"
+#include "fmt/core.h"
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+/* Helper for unit test stringification */
+namespace doctest {
+template<>
+struct StringMaker<rspamd::css::css_color> {
+ static String convert(const rspamd::css::css_color &value)
+ {
+ return fmt::format("r={};g={};b={};alpha={}",
+ value.r, value.g, value.b, value.alpha)
+ .c_str();
+ }
+};
+
+}// namespace doctest
+
+namespace rspamd::css {
+
+auto css_value::maybe_color_from_string(const std::string_view &input)
+ -> std::optional<css_value>
+{
+
+ if (input.size() > 1 && input.front() == '#') {
+ return css_value::maybe_color_from_hex(input.substr(1));
+ }
+ else {
+ auto found_it = css_colors_map.find(input);
+
+ if (found_it != css_colors_map.end()) {
+ return css_value{found_it->second};
+ }
+ }
+
+ return std::nullopt;
+}
+
+constexpr static inline auto hexpair_decode(char c1, char c2) -> std::uint8_t
+{
+ std::uint8_t ret = 0;
+
+ if (c1 >= '0' && c1 <= '9') ret = c1 - '0';
+ else if (c1 >= 'A' && c1 <= 'F')
+ ret = c1 - 'A' + 10;
+ else if (c1 >= 'a' && c1 <= 'f')
+ ret = c1 - 'a' + 10;
+
+ ret *= 16;
+
+ if (c2 >= '0' && c2 <= '9') ret += c2 - '0';
+ else if (c2 >= 'A' && c2 <= 'F')
+ ret += c2 - 'A' + 10;
+ else if (c2 >= 'a' && c2 <= 'f')
+ ret += c2 - 'a' + 10;
+
+ return ret;
+}
+
+auto css_value::maybe_color_from_hex(const std::string_view &input)
+ -> std::optional<css_value>
+{
+ if (input.length() == 6) {
+ /* Plain RGB */
+ css_color col(hexpair_decode(input[0], input[1]),
+ hexpair_decode(input[2], input[3]),
+ hexpair_decode(input[4], input[5]));
+ return css_value(col);
+ }
+ else if (input.length() == 3) {
+ /* Rgb as 3 hex digests */
+ css_color col(hexpair_decode(input[0], input[0]),
+ hexpair_decode(input[1], input[1]),
+ hexpair_decode(input[2], input[2]));
+ return css_value(col);
+ }
+ else if (input.length() == 8) {
+ /* RGBA */
+ css_color col(hexpair_decode(input[0], input[1]),
+ hexpair_decode(input[2], input[3]),
+ hexpair_decode(input[4], input[5]),
+ hexpair_decode(input[6], input[7]));
+ return css_value(col);
+ }
+
+ return std::nullopt;
+}
+
+constexpr static inline auto rgb_color_component_convert(const css_parser_token &tok)
+ -> std::uint8_t
+{
+ std::uint8_t ret = 0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ auto dbl = std::get<float>(tok.value);
+
+ if (tok.flags & css_parser_token::number_percent) {
+ if (dbl > 100) {
+ dbl = 100;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+ ret = (std::uint8_t)(dbl / 100.0 * 255.0);
+ }
+ else {
+ if (dbl > 255) {
+ dbl = 255;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+
+ ret = (std::uint8_t)(dbl);
+ }
+ }
+
+ return ret;
+}
+
+constexpr static inline auto alpha_component_convert(const css_parser_token &tok)
+ -> std::uint8_t
+{
+ double ret = 1.0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ auto dbl = std::get<float>(tok.value);
+
+ if (tok.flags & css_parser_token::number_percent) {
+ if (dbl > 100) {
+ dbl = 100;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+ ret = (dbl / 100.0);
+ }
+ else {
+ if (dbl > 1.0) {
+ dbl = 1.0;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+
+ ret = dbl;
+ }
+ }
+
+ return (std::uint8_t)(ret * 255.0);
+}
+
+constexpr static inline auto h_component_convert(const css_parser_token &tok)
+ -> double
+{
+ double ret = 0.0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ auto dbl = std::get<float>(tok.value);
+
+ if (tok.flags & css_parser_token::number_percent) {
+ if (dbl > 100) {
+ dbl = 100;
+ }
+ else if (dbl < 0) {
+ dbl = 0;
+ }
+ ret = (dbl / 100.0);
+ }
+ else {
+ dbl = ((((int) dbl % 360) + 360) % 360); /* Deal with rotations */
+ ret = dbl / 360.0; /* Normalize to 0..1 */
+ }
+ }
+
+ return ret;
+}
+
+constexpr static inline auto sl_component_convert(const css_parser_token &tok)
+ -> double
+{
+ double ret = 0.0;
+
+ if (tok.type == css_parser_token::token_type::number_token) {
+ ret = tok.get_normal_number_or_default(ret);
+ }
+
+ return ret;
+}
+
+static inline auto hsl_to_rgb(double h, double s, double l)
+ -> css_color
+{
+ css_color ret;
+
+ constexpr auto hue2rgb = [](auto p, auto q, auto t) -> auto {
+ if (t < 0.0) {
+ t += 1.0;
+ }
+ if (t > 1.0) {
+ t -= 1.0;
+ }
+ if (t * 6. < 1.0) {
+ return p + (q - p) * 6.0 * t;
+ }
+ if (t * 2. < 1) {
+ return q;
+ }
+ if (t * 3. < 2.) {
+ return p + (q - p) * (2.0 / 3.0 - t) * 6.0;
+ }
+ return p;
+ };
+
+ if (s == 0) {
+ /* Achromatic */
+ ret.r = l;
+ ret.g = l;
+ ret.b = l;
+ }
+ else {
+ auto q = l <= 0.5 ? l * (1.0 + s) : l + s - l * s;
+ auto p = 2.0 * l - q;
+ ret.r = (std::uint8_t)(hue2rgb(p, q, h + 1.0 / 3.0) * 255);
+ ret.g = (std::uint8_t)(hue2rgb(p, q, h) * 255);
+ ret.b = (std::uint8_t)(hue2rgb(p, q, h - 1.0 / 3.0) * 255);
+ }
+
+ ret.alpha = 255;
+
+ return ret;
+}
+
+auto css_value::maybe_color_from_function(const css_consumed_block::css_function_block &func)
+ -> std::optional<css_value>
+{
+
+ if (func.as_string() == "rgb" && func.args.size() == 3) {
+ css_color col{rgb_color_component_convert(func.args[0]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[1]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[2]->get_token_or_empty())};
+
+ return css_value(col);
+ }
+ else if (func.as_string() == "rgba" && func.args.size() == 4) {
+ css_color col{rgb_color_component_convert(func.args[0]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[1]->get_token_or_empty()),
+ rgb_color_component_convert(func.args[2]->get_token_or_empty()),
+ alpha_component_convert(func.args[3]->get_token_or_empty())};
+
+ return css_value(col);
+ }
+ else if (func.as_string() == "hsl" && func.args.size() == 3) {
+ auto h = h_component_convert(func.args[0]->get_token_or_empty());
+ auto s = sl_component_convert(func.args[1]->get_token_or_empty());
+ auto l = sl_component_convert(func.args[2]->get_token_or_empty());
+
+ auto col = hsl_to_rgb(h, s, l);
+
+ return css_value(col);
+ }
+ else if (func.as_string() == "hsla" && func.args.size() == 4) {
+ auto h = h_component_convert(func.args[0]->get_token_or_empty());
+ auto s = sl_component_convert(func.args[1]->get_token_or_empty());
+ auto l = sl_component_convert(func.args[2]->get_token_or_empty());
+
+ auto col = hsl_to_rgb(h, s, l);
+ col.alpha = alpha_component_convert(func.args[3]->get_token_or_empty());
+
+ return css_value(col);
+ }
+
+ return std::nullopt;
+}
+
+auto css_value::maybe_dimension_from_number(const css_parser_token &tok)
+ -> std::optional<css_value>
+{
+ if (std::holds_alternative<float>(tok.value)) {
+ auto dbl = std::get<float>(tok.value);
+ css_dimension dim;
+
+ dim.dim = dbl;
+
+ if (tok.flags & css_parser_token::number_percent) {
+ dim.is_percent = true;
+ }
+ else {
+ dim.is_percent = false;
+ }
+
+ return css_value{dim};
+ }
+
+ return std::nullopt;
+}
+
+constexpr const auto display_names_map = frozen::make_unordered_map<frozen::string, css_display_value>({
+ {"hidden", css_display_value::DISPLAY_HIDDEN},
+ {"none", css_display_value::DISPLAY_HIDDEN},
+ {"inline", css_display_value::DISPLAY_INLINE},
+ {"block", css_display_value::DISPLAY_BLOCK},
+ {"content", css_display_value::DISPLAY_INLINE},
+ {"flex", css_display_value::DISPLAY_BLOCK},
+ {"grid", css_display_value::DISPLAY_BLOCK},
+ {"inline-block", css_display_value::DISPLAY_INLINE},
+ {"inline-flex", css_display_value::DISPLAY_INLINE},
+ {"inline-grid", css_display_value::DISPLAY_INLINE},
+ {"inline-table", css_display_value::DISPLAY_INLINE},
+ {"list-item", css_display_value::DISPLAY_BLOCK},
+ {"run-in", css_display_value::DISPLAY_INLINE},
+ {"table", css_display_value::DISPLAY_BLOCK},
+ {"table-caption", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-column-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-header-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-footer-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-row-group", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-cell", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-column", css_display_value::DISPLAY_TABLE_ROW},
+ {"table-row", css_display_value::DISPLAY_TABLE_ROW},
+ {"initial", css_display_value::DISPLAY_INLINE},
+});
+
+auto css_value::maybe_display_from_string(const std::string_view &input)
+ -> std::optional<css_value>
+{
+ auto f = display_names_map.find(input);
+
+ if (f != display_names_map.end()) {
+ return css_value{f->second};
+ }
+
+ return std::nullopt;
+}
+
+
+auto css_value::debug_str() const -> std::string
+{
+ std::string ret;
+
+ std::visit([&](const auto &arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, css_color>) {
+ ret += fmt::format("color: r={};g={};b={};alpha={}",
+ arg.r, arg.g, arg.b, arg.alpha);
+ }
+ else if constexpr (std::is_same_v<T, double>) {
+ ret += "size: " + std::to_string(arg);
+ }
+ else if constexpr (std::is_same_v<T, css_dimension>) {
+ ret += "dimension: " + std::to_string(arg.dim);
+ if (arg.is_percent) {
+ ret += "%";
+ }
+ }
+ else if constexpr (std::is_same_v<T, css_display_value>) {
+ ret += "display: ";
+ switch (arg) {
+ case css_display_value::DISPLAY_HIDDEN:
+ ret += "hidden";
+ break;
+ case css_display_value::DISPLAY_BLOCK:
+ ret += "block";
+ break;
+ case css_display_value::DISPLAY_INLINE:
+ ret += "inline";
+ break;
+ case css_display_value::DISPLAY_TABLE_ROW:
+ ret += "table_row";
+ break;
+ }
+ }
+ else if constexpr (std::is_integral_v<T>) {
+ ret += "integral: " + std::to_string(static_cast<int>(arg));
+ }
+ else {
+ ret += "nyi";
+ }
+ },
+ value);
+
+ return ret;
+}
+
+TEST_SUITE("css"){
+ TEST_CASE("css hex colors"){
+ const std::pair<const char *, css_color> hex_tests[] = {
+ {"000", css_color(0, 0, 0)},
+ {"000000", css_color(0, 0, 0)},
+ {"f00", css_color(255, 0, 0)},
+ {"FEDCBA", css_color(254, 220, 186)},
+ {"234", css_color(34, 51, 68)},
+ };
+
+for (const auto &p: hex_tests) {
+ SUBCASE((std::string("parse hex color: ") + p.first).c_str())
+ {
+ auto col_parsed = css_value::maybe_color_from_hex(p.first);
+ //CHECK_UNARY(col_parsed);
+ //CHECK_UNARY(col_parsed.value().to_color());
+ auto final_col = col_parsed.value().to_color().value();
+ CHECK(final_col == p.second);
+ }
+}
+}// namespace rspamd::css
+TEST_CASE("css colors strings")
+{
+ auto passed = 0;
+ for (const auto &p: css_colors_map) {
+ /* Match some of the colors selected randomly */
+ if (rspamd_random_double_fast() > 0.9) {
+ auto col_parsed = css_value::maybe_color_from_string(p.first);
+ auto final_col = col_parsed.value().to_color().value();
+ CHECK_MESSAGE(final_col == p.second, p.first.data());
+ passed++;
+
+ if (passed > 20) {
+ break;
+ }
+ }
+ }
+}
+}
+;
+}
diff --git a/src/libserver/css/css_value.hxx b/src/libserver/css/css_value.hxx
new file mode 100644
index 0000000..1d57421
--- /dev/null
+++ b/src/libserver/css/css_value.hxx
@@ -0,0 +1,174 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_CSS_VALUE_HXX
+#define RSPAMD_CSS_VALUE_HXX
+
+#include <string>
+#include <variant>
+#include <optional>
+#include <vector>
+#include <iosfwd>
+#include "parse_error.hxx"
+#include "css_parser.hxx"
+#include "contrib/expected/expected.hpp"
+
+namespace rspamd::css {
+
+struct alignas(int) css_color {
+ std::uint8_t r;
+ std::uint8_t g;
+ std::uint8_t b;
+
+ std::uint8_t alpha;
+
+ css_color(std::uint8_t _r, std::uint8_t _g, std::uint8_t _b, std::uint8_t _alpha = 255)
+ : r(_r), g(_g), b(_b), alpha(_alpha)
+ {
+ }
+ css_color() = default;
+ constexpr auto to_number() const -> std::uint32_t
+ {
+ return (std::uint32_t) alpha << 24 |
+ (std::uint32_t) r << 16 |
+ (std::uint32_t) g << 8 |
+ (std::uint32_t) b << 0;
+ }
+
+ constexpr auto to_rgb() const -> std::uint32_t
+ {
+ return (std::uint32_t) r << 16 |
+ (std::uint32_t) g << 8 |
+ (std::uint32_t) b << 0;
+ }
+ friend bool operator==(const css_color &l, const css_color &r)
+ {
+ return (memcmp(&l, &r, sizeof(css_color)) == 0);
+ }
+
+ static auto white() -> css_color
+ {
+ return css_color{255, 255, 255};
+ }
+ static auto black() -> css_color
+ {
+ return css_color{0, 0, 0};
+ }
+};
+
+struct css_dimension {
+ float dim;
+ bool is_percent;
+};
+
+/*
+ * Simple enum class for display stuff
+ */
+enum class css_display_value : std::uint8_t {
+ DISPLAY_INLINE,
+ DISPLAY_BLOCK,
+ DISPLAY_TABLE_ROW,
+ DISPLAY_HIDDEN
+};
+
+/*
+ * Value handler, uses std::variant instead of polymorphic classes for now
+ * for simplicity
+ */
+struct css_value {
+ std::variant<css_color,
+ float,
+ css_display_value,
+ css_dimension,
+ std::monostate>
+ value;
+
+ css_value()
+ {
+ }
+ css_value(const css_color &color)
+ : value(color)
+ {
+ }
+ css_value(float num)
+ : value(num)
+ {
+ }
+ css_value(css_dimension dim)
+ : value(dim)
+ {
+ }
+ css_value(css_display_value d)
+ : value(d)
+ {
+ }
+
+ auto to_color(void) const -> std::optional<css_color>
+ {
+ return extract_value_maybe<css_color>();
+ }
+
+ auto to_number(void) const -> std::optional<float>
+ {
+ return extract_value_maybe<float>();
+ }
+
+ auto to_dimension(void) const -> std::optional<css_dimension>
+ {
+ return extract_value_maybe<css_dimension>();
+ }
+
+ auto to_display(void) const -> std::optional<css_display_value>
+ {
+ return extract_value_maybe<css_display_value>();
+ }
+
+ auto is_valid(void) const -> bool
+ {
+ return !(std::holds_alternative<std::monostate>(value));
+ }
+
+ auto debug_str() const -> std::string;
+
+ static auto maybe_color_from_string(const std::string_view &input)
+ -> std::optional<css_value>;
+ static auto maybe_color_from_hex(const std::string_view &input)
+ -> std::optional<css_value>;
+ static auto maybe_color_from_function(const css_consumed_block::css_function_block &func)
+ -> std::optional<css_value>;
+ static auto maybe_dimension_from_number(const css_parser_token &tok)
+ -> std::optional<css_value>;
+ static auto maybe_display_from_string(const std::string_view &input)
+ -> std::optional<css_value>;
+
+private:
+ template<typename T>
+ auto extract_value_maybe(void) const -> std::optional<T>
+ {
+ if (std::holds_alternative<T>(value)) {
+ return std::get<T>(value);
+ }
+
+ return std::nullopt;
+ }
+};
+
+}// namespace rspamd::css
+
+
+#endif//RSPAMD_CSS_VALUE_HXX
diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx
new file mode 100644
index 0000000..22b76f0
--- /dev/null
+++ b/src/libserver/css/parse_error.hxx
@@ -0,0 +1,61 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifndef RSPAMD_PARSE_ERROR_HXX
+#define RSPAMD_PARSE_ERROR_HXX
+
+#include <string>
+#include <optional>
+
+namespace rspamd::css {
+
+/*
+ * Generic parser errors
+ */
+enum class css_parse_error_type {
+ PARSE_ERROR_UNKNOWN_OPTION,
+ PARSE_ERROR_INVALID_SYNTAX,
+ PARSE_ERROR_BAD_NESTING,
+ PARSE_ERROR_NYI,
+ PARSE_ERROR_UNKNOWN_ERROR,
+ /* All above is treated as fatal error in parsing */
+ PARSE_ERROR_NO_ERROR,
+ PARSE_ERROR_EMPTY,
+};
+
+struct css_parse_error {
+ css_parse_error_type type = css_parse_error_type::PARSE_ERROR_UNKNOWN_ERROR;
+ std::optional<std::string> description;
+
+ explicit css_parse_error(css_parse_error_type type, const std::string &description)
+ : type(type), description(description)
+ {
+ }
+ explicit css_parse_error(css_parse_error_type type = css_parse_error_type::PARSE_ERROR_NO_ERROR)
+ : type(type)
+ {
+ }
+
+ constexpr auto is_fatal(void) const -> bool
+ {
+ return type < css_parse_error_type::PARSE_ERROR_NO_ERROR;
+ }
+};
+
+}// namespace rspamd::css
+#endif//RSPAMD_PARSE_ERROR_HXX