blob: 329d70cdbc99b8947d295eec03071961a03847c7 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
/*
* Copyright (C) 2005-2021 Team Kodi
* This file is part of Kodi - https://kodi.tv
*
* SPDX-License-Identifier: GPL-2.0-or-later
* See LICENSES/README.md for more information.
*/
#include "CSSUtils.h"
#include <cstdint>
#include <string>
namespace
{
// https://www.w3.org/TR/css-syntax-3/#hex-digit
bool isHexDigit(char c)
{
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
// https://www.w3.org/TR/css-syntax-3/#hex-digit
uint32_t convertHexDigit(char c)
{
if (c >= '0' && c <= '9')
{
return c - '0';
}
else if (c >= 'A' && c <= 'F')
{
return 10 + c - 'A';
}
else
{
return 10 + c - 'a';
}
}
// https://infra.spec.whatwg.org/#surrogate
bool isSurrogateCodePoint(uint32_t c)
{
return c >= 0xD800 && c <= 0xDFFF;
}
// https://www.w3.org/TR/css-syntax-3/#maximum-allowed-code-point
bool isGreaterThanMaximumAllowedCodePoint(uint32_t c)
{
return c > 0x10FFFF;
}
// https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
std::string escapeStringChunk(std::string& str, size_t& pos)
{
if (str.size() < pos + 1)
return "";
uint32_t codePoint = convertHexDigit(str[pos + 1]);
if (str.size() >= pos + 2)
pos += 2;
else
return "";
int numDigits = 1;
while (numDigits < 6 && isHexDigit(str[pos]))
{
codePoint = 16 * codePoint + convertHexDigit(str[pos]);
if (str.size() >= pos + 1)
{
pos += 1;
numDigits += 1;
}
else
break;
}
std::string result;
// Convert code point to UTF-8 bytes
if (codePoint == 0 || isSurrogateCodePoint(codePoint) ||
isGreaterThanMaximumAllowedCodePoint(codePoint))
{
result += u8"\uFFFD";
}
else if (codePoint < 0x80)
{
// 1-byte UTF-8: 0xxxxxxx
result += static_cast<char>(codePoint);
}
else if (codePoint < 0x800)
{
// 2-byte UTF-8: 110xxxxx 10xxxxxx
uint32_t x1 = codePoint >> 6; // 6 = num of x's in 2nd byte
uint32_t x2 = codePoint - (x1 << 6); // 6 = num of x's in 2nd byte
uint32_t b1 = (6 << 5) + x1; // 6 = 0b110 ; 5 = num of x's in 1st byte
uint32_t b2 = (2 << 6) + x2; // 2 = 0b10 ; 6 = num of x's in 2nd byte
result += static_cast<char>(b1);
result += static_cast<char>(b2);
}
else if (codePoint < 0x10000)
{
// 3-byte UTF-8: 1110xxxx 10xxxxxx 10xxxxxx
uint32_t y1 = codePoint >> 6;
uint32_t x3 = codePoint - (y1 << 6);
uint32_t x1 = y1 >> 6;
uint32_t x2 = y1 - (x1 << 6);
uint32_t b1 = (14 << 4) + x1;
uint32_t b2 = (2 << 6) + x2;
uint32_t b3 = (2 << 6) + x3;
result += static_cast<char>(b1);
result += static_cast<char>(b2);
result += static_cast<char>(b3);
}
else
{
// 4-byte UTF-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
uint32_t y2 = codePoint >> 6;
uint32_t x4 = codePoint - (y2 << 6);
uint32_t y1 = y2 >> 6;
uint32_t x3 = y2 - (y1 << 6);
uint32_t x1 = y1 >> 6;
uint32_t x2 = y1 - (x1 << 6);
uint32_t b1 = (30 << 3) + x1;
uint32_t b2 = (2 << 6) + x2;
uint32_t b3 = (2 << 6) + x3;
uint32_t b4 = (2 << 6) + x4;
result += static_cast<char>(b1);
result += static_cast<char>(b2);
result += static_cast<char>(b3);
result += static_cast<char>(b4);
}
return result;
}
} // unnamed namespace
void UTILS::CSS::Escape(std::string& str)
{
std::string result;
for (size_t pos = 0; pos < str.size(); pos++)
{
if (str[pos] == '\\')
result += escapeStringChunk(str, pos);
else
result += str[pos];
}
str = result;
}
|