summaryrefslogtreecommitdiffstats
path: root/src/extract-uri.cpp
blob: 9bd39c0d9043255a2a6392e48d2f748653aa61a1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// SPDX-License-Identifier: GPL-2.0-or-later
/** @file
 * TODO: insert short description here
 *//*
 * Authors: see git history
 *
 * Copyright (C) 2018 Authors
 * Released under GNU GPL v2+, read the file 'COPYING' for more information.
 */
#include <cstring>
#include <glib.h>
#include <optional>

#include "extract-uri.h"

// FIXME: kill this ugliness when we have a proper CSS parser

// Functions as per 4.3.4 of CSS 2.1
// http://www.w3.org/TR/CSS21/syndata.html#uri
std::string extract_uri(char const *s, char const **endptr)
{
    std::string result;

    if (!s)
        return result;

    gchar const *sb = s;
    if ( strlen(sb) < 4 || strncmp(sb, "url", 3) != 0 ) {
        return result;
    }

    sb += 3;

    if ( endptr ) {
        *endptr = nullptr;
    }

    // This first whitespace technically is not allowed.
    // Just left in for now for legacy behavior.
    while ( ( *sb == ' ' ) ||
            ( *sb == '\t' ) )
    {
        sb++;
    }

    if ( *sb == '(' ) {
        sb++;
        while ( ( *sb == ' ' ) ||
                ( *sb == '\t' ) )
        {
            sb++;
        }

        gchar delim = ')';
        if ( (*sb == '\'' || *sb == '"') ) {
            delim = *sb;
            sb++;
        }

        if (!*sb) {
            return result;
        }

        gchar const* se = sb;
        while ( *se && (*se != delim) ) {
            se++;
        }

        // we found the delimiter
        if ( *se ) {
            if ( delim == ')' ) {
                if ( endptr ) {
                    *endptr = se + 1;
                }

                // back up for any trailing whitespace
                while (se > sb && g_ascii_isspace(se[-1]))
                {
                    se--;
                }

                result = std::string(sb, se);
            } else {
                gchar const* tail = se + 1;
                while ( ( *tail == ' ' ) ||
                        ( *tail == '\t' ) )
                {
                    tail++;
                }
                if ( *tail == ')' ) {
                    if ( endptr ) {
                        *endptr = tail + 1;
                    }
                    result = std::string(sb, se);
                }
            }
        }
    }

    return result;
}

std::optional<std::string> try_extract_uri(const char* url) {
    auto link = extract_uri(url);
    return link.empty() ? std::nullopt : std::make_optional(link);
}

std::optional<std::string> try_extract_uri_id(const char *url) {
    if (auto ret = try_extract_uri(url)) {
        if (!ret->empty() && (*ret)[0] == '#') {
            ret->erase(0, 1);
            return ret;
        }
    }
    return std::nullopt;
}

/*
  Local Variables:
  mode:c++
  c-file-style:"stroustrup"
  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
  indent-tabs-mode:nil
  fill-column:99
  End:
*/
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:fileencoding=utf-8:textwidth=99 :