blob: a833ada92e65debde361ff5763c04a2af5243b60 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "utf8.hpp"
#include <ixion/exceptions.hpp>
#include <sstream>
#include <limits>
namespace ixion { namespace detail {
namespace {
constexpr uint8_t invalid_utf8_byte_length = std::numeric_limits<uint8_t>::max();
uint8_t calc_utf8_byte_length(uint8_t c1)
{
if ((c1 & 0x80) == 0x00)
// highest bit is not set.
return 1;
if ((c1 & 0xE0) == 0xC0)
// highest 3 bits are 110.
return 2;
if ((c1 & 0xF0) == 0xE0)
// highest 4 bits are 1110.
return 3;
if ((c1 & 0xFC) == 0xF0)
// highest 5 bits are 11110.
return 4;
return invalid_utf8_byte_length;
}
}
std::vector<std::size_t> calc_utf8_byte_positions(const std::string& s)
{
const char* p = s.data();
const char* p0 = p; // head position
const char* p_end = p + s.size();
std::vector<std::size_t> positions;
while (p < p_end)
{
positions.push_back(std::distance(p0, p));
uint8_t n = calc_utf8_byte_length(*p);
if (n == invalid_utf8_byte_length)
{
std::ostringstream os;
os << "invalid utf8 byte length in string '" << s << "'";
throw general_error(os.str());
}
p += n;
}
return positions;
}
}}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|