summaryrefslogtreecommitdiffstats
path: root/src/libserver/css/css_util.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 21:30:40 +0000
commit133a45c109da5310add55824db21af5239951f93 (patch)
treeba6ac4c0a950a0dda56451944315d66409923918 /src/libserver/css/css_util.cxx
parentInitial commit. (diff)
downloadrspamd-133a45c109da5310add55824db21af5239951f93.tar.xz
rspamd-133a45c109da5310add55824db21af5239951f93.zip
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libserver/css/css_util.cxx')
-rw-r--r--src/libserver/css/css_util.cxx157
1 files changed, 157 insertions, 0 deletions
diff --git a/src/libserver/css/css_util.cxx b/src/libserver/css/css_util.cxx
new file mode 100644
index 0000000..07f8722
--- /dev/null
+++ b/src/libserver/css/css_util.cxx
@@ -0,0 +1,157 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "css_util.hxx"
+#include "css.hxx"
+#include <unicode/utf8.h>
+
+namespace rspamd::css {
+
+std::string_view unescape_css(rspamd_mempool_t *pool,
+ const std::string_view &sv)
+{
+ auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, sv.length()));
+ auto *d = nspace;
+ auto nleft = sv.length();
+
+ enum {
+ normal = 0,
+ quoted,
+ escape,
+ skip_spaces,
+ } state = normal;
+
+ char quote_char, prev_c = 0;
+ auto escape_offset = 0, i = 0;
+
+#define MAYBE_CONSUME_CHAR(c) \
+ do { \
+ if ((c) == '"' || (c) == '\'') { \
+ state = quoted; \
+ quote_char = (c); \
+ nleft--; \
+ *d++ = (c); \
+ } \
+ else if ((c) == '\\') { \
+ escape_offset = i; \
+ state = escape; \
+ } \
+ else { \
+ state = normal; \
+ nleft--; \
+ *d++ = g_ascii_tolower(c); \
+ } \
+ } while (0)
+
+ for (const auto c: sv) {
+ if (nleft == 0) {
+ msg_err_css("cannot unescape css: truncated buffer of size %d",
+ (int) sv.length());
+ break;
+ }
+ switch (state) {
+ case normal:
+ MAYBE_CONSUME_CHAR(c);
+ break;
+ case quoted:
+ if (c == quote_char) {
+ if (prev_c != '\\') {
+ state = normal;
+ }
+ }
+ prev_c = c;
+ nleft--;
+ *d++ = c;
+ break;
+ case escape:
+ if (!g_ascii_isxdigit(c)) {
+ if (i > escape_offset + 1) {
+ /* Try to decode an escape */
+ const auto *escape_start = &sv[escape_offset + 1];
+ unsigned long val;
+
+ if (!rspamd_xstrtoul(escape_start, i - escape_offset - 1, &val)) {
+ msg_debug_css("invalid broken escape found at pos %d",
+ escape_offset);
+ }
+ else {
+ if (val < 0x80) {
+ /* Trivial case: ascii character */
+ *d++ = (unsigned char) g_ascii_tolower(val);
+ nleft--;
+ }
+ else {
+ UChar32 uc = val;
+ auto off = 0;
+ UTF8_APPEND_CHAR_SAFE((uint8_t *) d, off,
+ sv.length(), u_tolower(uc));
+ d += off;
+ nleft -= off;
+ }
+ }
+ }
+ else {
+ /* Empty escape, ignore it */
+ msg_debug_css("invalid empty escape found at pos %d",
+ escape_offset);
+ }
+
+ if (nleft <= 0) {
+ msg_err_css("cannot unescape css: truncated buffer of size %d",
+ (int) sv.length());
+ }
+ else {
+ /* Escape is done, advance forward */
+ if (g_ascii_isspace(c)) {
+ state = skip_spaces;
+ }
+ else {
+ MAYBE_CONSUME_CHAR(c);
+ }
+ }
+ }
+ break;
+ case skip_spaces:
+ if (!g_ascii_isspace(c)) {
+ MAYBE_CONSUME_CHAR(c);
+ }
+ /* Ignore spaces */
+ break;
+ }
+
+ i++;
+ }
+
+ return std::string_view{nspace, sv.size() - nleft};
+}
+
+}// namespace rspamd::css
+
+/* C API */
+const gchar *rspamd_css_unescape(rspamd_mempool_t *pool,
+ const guchar *begin,
+ gsize len,
+ gsize *outlen)
+{
+ auto sv = rspamd::css::unescape_css(pool, {(const char *) begin, len});
+ const auto *v = sv.begin();
+
+ if (outlen) {
+ *outlen = sv.size();
+ }
+
+ return v;
+} \ No newline at end of file