summaryrefslogtreecommitdiffstats
path: root/third_party/python/giturlparse/giturlparse/parser.py
blob: c67f03500de43485a360e0d52de7ba70971641d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from collections import defaultdict

from .platforms import PLATFORMS

SUPPORTED_ATTRIBUTES = (
    "domain",
    "repo",
    "owner",
    "path_raw",
    "groups_path",
    "_user",
    "port",
    "url",
    "platform",
    "protocol",
)


def parse(url, check_domain=True):
    # Values are None by default
    parsed_info = defaultdict(lambda: None)
    parsed_info["port"] = ""
    parsed_info["path_raw"] = ""
    parsed_info["groups_path"] = ""

    # Defaults to all attributes
    map(parsed_info.setdefault, SUPPORTED_ATTRIBUTES)

    for name, platform in PLATFORMS:
        for protocol, regex in platform.COMPILED_PATTERNS.items():
            # print(name, protocol, regex)
            # Match current regex against URL
            match = regex.match(url)

            # Skip if not matched
            if not match:
                # print("[%s] URL: %s dit not match %s" % (name, url, regex.pattern))
                continue

            # Skip if domain is bad
            domain = match.group("domain")
            # print('[%s] DOMAIN = %s' % (url, domain,))
            if check_domain:
                if platform.DOMAINS and not (domain in platform.DOMAINS):
                    continue
                if platform.SKIP_DOMAINS and domain in platform.SKIP_DOMAINS:
                    continue

            # add in platform defaults
            parsed_info.update(platform.DEFAULTS)

            # Get matches as dictionary
            matches = platform.clean_data(match.groupdict(default=""))

            # Update info with matches
            parsed_info.update(matches)

            # Update info with platform info
            parsed_info.update(
                {
                    "url": url,
                    "platform": name,
                    "protocol": protocol,
                }
            )
            return parsed_info

    # Empty if none matched
    return parsed_info