summaryrefslogtreecommitdiffstats
path: root/src/pcrepp/pcre2pp.hh
blob: a40d26cb3cabf8f9c5ab6b5c7b0caa3dbbcc8d00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
/**
 * Copyright (c) 2022, Timothy Stack
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 * * Neither the name of Timothy Stack nor the names of its contributors
 * may be used to endorse or promote products derived from this software
 * without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef lnav_pcre2pp_hh
#define lnav_pcre2pp_hh

#define PCRE2_CODE_UNIT_WIDTH 8

#include <memory>
#include <string>
#include <vector>

#include <pcre2.h>

#include "base/auto_mem.hh"
#include "base/intern_string.hh"
#include "base/result.h"
#include "mapbox/variant.hpp"

namespace lnav {
namespace pcre2pp {

std::string quote(const char* unquoted);

inline std::string
quote(const std::string& unquoted)
{
    return quote(unquoted.c_str());
}

class code;
struct capture_builder;
class matcher;

struct input {
    string_fragment i_string;
    int i_offset{0};
    int i_next_offset{0};
};

class match_data {
public:
    static match_data unitialized() { return match_data{}; }

    string_fragment leading() const
    {
        return this->md_input.i_string.sub_range(this->md_input.i_offset,
                                                 this->md_ovector[0]);
    }

    string_fragment remaining() const
    {
        if (this->md_capture_end == 0 || this->md_input.i_next_offset == -1) {
            return string_fragment::invalid();
        }

        return string_fragment::from_byte_range(
            this->md_input.i_string.sf_string,
            this->md_input.i_string.sf_begin + this->md_input.i_next_offset,
            this->md_input.i_string.sf_end);
    }

    nonstd::optional<string_fragment> operator[](size_t index) const
    {
        if (index >= this->md_capture_end) {
            return nonstd::nullopt;
        }

        auto start = this->md_ovector[(index * 2)];
        auto stop = this->md_ovector[(index * 2) + 1];
        if (start == PCRE2_UNSET || stop == PCRE2_UNSET) {
            return nonstd::nullopt;
        }

        return this->md_input.i_string.sub_range(start, stop);
    }

    template<typename T, std::size_t N>
    nonstd::optional<string_fragment> operator[](const T (&name)[N]) const;

    int get_count() const { return this->md_capture_end; }

    uint32_t get_capacity() const { return this->md_ovector_count; }

private:
    friend matcher;
    friend code;

    match_data() = default;

    explicit match_data(auto_mem<pcre2_match_data> dat)
        : md_data(std::move(dat)),
          md_ovector(pcre2_get_ovector_pointer(this->md_data.in())),
          md_ovector_count(pcre2_get_ovector_count(this->md_data.in()))
    {
    }

    auto_mem<pcre2_match_data> md_data;
    const code* md_code{nullptr};
    input md_input;
    PCRE2_SIZE* md_ovector{nullptr};
    uint32_t md_ovector_count{0};
    int md_capture_end{0};
};

class matcher {
public:
    struct found {
        string_fragment f_all;
        string_fragment f_remaining;
    };
    struct not_found {};
    struct error {
        const code* e_code{nullptr};
        int e_error_code{0};
        std::string get_message();
    };

    class matches_result
        : public mapbox::util::variant<found, not_found, error> {
    public:
        using variant::variant;

        nonstd::optional<found> ignore_error()
        {
            return this->match(
                [](found fo) { return nonstd::make_optional(fo); },
                [](not_found) { return nonstd::nullopt; },
                [](error err) {
                    handle_error(err);
                    return nonstd::nullopt;
                });
        }

    private:
        static void handle_error(error err);
    };

    matcher& reload_input(string_fragment sf, int next_offset)
    {
        this->mb_input = input{sf, next_offset, next_offset};

        return *this;
    }

    matches_result matches(uint32_t options = 0);

    int get_next_offset() const { return this->mb_input.i_next_offset; }

private:
    friend capture_builder;

    matcher(const code& co, input& in, match_data& md)
        : mb_code(co), mb_input(in), mb_match_data(md)
    {
    }

    const code& mb_code;
    input mb_input;
    match_data& mb_match_data;
};

struct capture_builder {
    const code& mb_code;
    input mb_input;

    capture_builder at(const string_fragment& remaining) &&
    {
        this->mb_input.i_offset = this->mb_input.i_next_offset
            = remaining.sf_begin;
        return *this;
    }

    matcher into(match_data& md) &&;

    template<uint32_t Options = 0, typename F>
    Result<string_fragment, matcher::error> for_each(F func) &&;
};

struct compile_error {
    std::string ce_pattern;
    int ce_code{0};
    size_t ce_offset{0};

    std::string get_message() const;
};

class code {
public:
    class named_capture {
    public:
        size_t get_index() const;
        string_fragment get_name() const;

        PCRE2_SPTR nc_entry;
    };

    class named_captures {
    public:
        struct iterator {
            named_capture operator*() const;
            iterator& operator++();
            bool operator==(const iterator& other) const;
            bool operator!=(const iterator& other) const;

            uint32_t i_entry_size;
            PCRE2_SPTR i_entry;
        };

        iterator begin() const;
        iterator end() const;
        bool empty() const { return this->nc_count == 0; }
        size_t size() const { return this->nc_count; }

    private:
        friend code;

        named_captures() = default;

        uint32_t nc_count{0};
        uint32_t nc_entry_size{0};
        PCRE2_SPTR nc_name_table{nullptr};
    };

    static Result<code, compile_error> from(string_fragment sf,
                                            int options = 0);

    template<typename T, std::size_t N>
    static code from_const(const T (&str)[N], int options = 0)
    {
        return from(string_fragment::from_const(str), options).unwrap();
    }

    const std::string& get_pattern() const { return this->p_pattern; }

    named_captures get_named_captures() const;

    const char* get_name_for_capture(size_t index) const;

    size_t get_capture_count() const;

    int name_index(const char* name) const;

    std::vector<string_fragment> get_captures() const;

    uint32_t get_match_data_capacity() const {
        return this->p_match_proto.md_ovector_count;
    }

    match_data create_match_data() const;

    capture_builder capture_from(string_fragment in) const
    {
        return capture_builder{
            *this,
            input{in},
        };
    }

    matcher::matches_result find_in(string_fragment in,
                                    uint32_t options = 0) const
    {
        static thread_local match_data md = this->create_match_data();

        if (md.md_ovector_count < this->p_match_proto.md_ovector_count) {
            md = this->create_match_data();
        }

        return this->capture_from(in).into(md).matches(options);
    }

    size_t match_partial(string_fragment in) const;

    std::string replace(string_fragment str, const char* repl) const;

    std::shared_ptr<code> to_shared() &&
    {
        return std::make_shared<code>(std::move(this->p_code),
                                      std::move(this->p_pattern));
    }

    code(auto_mem<pcre2_code> code, std::string pattern)
        : p_code(std::move(code)), p_pattern(std::move(pattern)),
          p_match_proto(this->create_match_data())
    {
    }

private:
    friend matcher;
    friend match_data;

    auto_mem<pcre2_code> p_code;
    std::string p_pattern;
    match_data p_match_proto;
};

template<typename T, std::size_t N>
nonstd::optional<string_fragment>
match_data::operator[](const T (&name)[N]) const
{
    auto index = pcre2_substring_number_from_name(
        this->md_code->p_code.in(),
        reinterpret_cast<const unsigned char*>(name));

    return this->operator[](index);
}

template<uint32_t Options, typename F>
Result<string_fragment, matcher::error>
capture_builder::for_each(F func) &&
{
    auto md = this->mb_code.create_match_data();
    auto mat = matcher{this->mb_code, this->mb_input, md};

    bool done = false;
    matcher::error eret;

    while (!done) {
        auto match_res = mat.matches(Options);
        done = match_res.match(
            [mat, &func](matcher::found) {
                func(mat.mb_match_data);
                return false;
            },
            [](matcher::not_found) { return true; },
            [&eret](matcher::error err) {
                eret = err;
                return true;
            });
    }

    if (eret.e_error_code == 0) {
        return Ok(md.remaining());
    }
    return Err(eret);
}

}  // namespace pcre2pp
}  // namespace lnav

#endif