diff options
Diffstat (limited to 'third_party/python/giturlparse/giturlparse')
10 files changed, 405 insertions, 0 deletions
diff --git a/third_party/python/giturlparse/giturlparse/__init__.py b/third_party/python/giturlparse/giturlparse/__init__.py new file mode 100644 index 0000000000..aee86e3750 --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/__init__.py @@ -0,0 +1,14 @@ +from .parser import parse as _parse +from .result import GitUrlParsed + +__author__ = "Iacopo Spalletti" +__email__ = "i.spalletti@nephila.it" +__version__ = "0.10.0" + + +def parse(url, check_domain=True): + return GitUrlParsed(_parse(url, check_domain)) + + +def validate(url, check_domain=True): + return parse(url, check_domain).valid diff --git a/third_party/python/giturlparse/giturlparse/parser.py b/third_party/python/giturlparse/giturlparse/parser.py new file mode 100644 index 0000000000..c67f03500d --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/parser.py @@ -0,0 +1,69 @@ +from collections import defaultdict + +from .platforms import PLATFORMS + +SUPPORTED_ATTRIBUTES = ( + "domain", + "repo", + "owner", + "path_raw", + "groups_path", + "_user", + "port", + "url", + "platform", + "protocol", +) + + +def parse(url, check_domain=True): + # Values are None by default + parsed_info = defaultdict(lambda: None) + parsed_info["port"] = "" + parsed_info["path_raw"] = "" + parsed_info["groups_path"] = "" + + # Defaults to all attributes + map(parsed_info.setdefault, SUPPORTED_ATTRIBUTES) + + for name, platform in PLATFORMS: + for protocol, regex in platform.COMPILED_PATTERNS.items(): + # print(name, protocol, regex) + # Match current regex against URL + match = regex.match(url) + + # Skip if not matched + if not match: + # print("[%s] URL: %s dit not match %s" % (name, url, regex.pattern)) + continue + + # Skip if domain is bad + domain = match.group("domain") + # print('[%s] DOMAIN = %s' % (url, domain,)) + if check_domain: + if platform.DOMAINS and not (domain in platform.DOMAINS): + continue + if platform.SKIP_DOMAINS and domain in platform.SKIP_DOMAINS: + continue + + # add in platform defaults + parsed_info.update(platform.DEFAULTS) + + # Get matches as dictionary + matches = platform.clean_data(match.groupdict(default="")) + + # Update info with matches + parsed_info.update(matches) + + # Update info with platform info + parsed_info.update( + { + "url": url, + "platform": name, + "protocol": protocol, + } + ) + return parsed_info + + # Empty if none matched + return parsed_info diff --git a/third_party/python/giturlparse/giturlparse/platforms/__init__.py b/third_party/python/giturlparse/giturlparse/platforms/__init__.py new file mode 100644 index 0000000000..8add1b7a78 --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/__init__.py @@ -0,0 +1,18 @@ +from .assembla import AssemblaPlatform +from .base import BasePlatform +from .bitbucket import BitbucketPlatform +from .friendcode import FriendCodePlatform +from .github import GitHubPlatform +from .gitlab import GitLabPlatform + +# Supported platforms +PLATFORMS = [ + # name -> Platform object + ("github", GitHubPlatform()), + ("bitbucket", BitbucketPlatform()), + ("friendcode", FriendCodePlatform()), + ("assembla", AssemblaPlatform()), + ("gitlab", GitLabPlatform()), + # Match url + ("base", BasePlatform()), +] diff --git a/third_party/python/giturlparse/giturlparse/platforms/assembla.py b/third_party/python/giturlparse/giturlparse/platforms/assembla.py new file mode 100644 index 0000000000..2624e85954 --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/assembla.py @@ -0,0 +1,14 @@ +from .base import BasePlatform + + +class AssemblaPlatform(BasePlatform): + DOMAINS = ("git.assembla.com",) + PATTERNS = { + "ssh": r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?):(?P<pathname>(?P<repo>.+)).git", + "git": r"(?P<protocols>(?P<protocol>git))://(?P<domain>.+?)/(?P<pathname>(?P<repo>.+)).git", + } + FORMATS = { + "ssh": r"git@%(domain)s:%(repo)s.git", + "git": r"git://%(domain)s/%(repo)s.git", + } + DEFAULTS = {"_user": "git"} diff --git a/third_party/python/giturlparse/giturlparse/platforms/base.py b/third_party/python/giturlparse/giturlparse/platforms/base.py new file mode 100644 index 0000000000..000726381d --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/base.py @@ -0,0 +1,43 @@ +import itertools +import re + + +class BasePlatform: + FORMATS = { + "ssh": r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?%(_user)s@%(host)s:%(repo)s.git", + "http": r"(?P<protocols>(git\+)?(?P<protocol>http))://%(host)s/%(repo)s.git", + "https": r"(?P<protocols>(git\+)?(?P<protocol>https))://%(host)s/%(repo)s.git", + "git": r"(?P<protocols>(?P<protocol>git))://%(host)s/%(repo)s.git", + } + + PATTERNS = { + "ssh": r"(?P<_user>.+)@(?P<domain>[^/]+?):(?P<repo>.+).git", + "http": r"http://(?P<domain>[^/]+?)/(?P<repo>.+).git", + "https": r"https://(?P<domain>[^/]+?)/(?P<repo>.+).git", + "git": r"git://(?P<domain>[^/]+?)/(?P<repo>.+).git", + } + + # None means it matches all domains + DOMAINS = None + SKIP_DOMAINS = None + DEFAULTS = {} + + def __init__(self): + # Precompile PATTERNS + self.COMPILED_PATTERNS = {proto: re.compile(regex, re.IGNORECASE) for proto, regex in self.PATTERNS.items()} + + # Supported protocols + self.PROTOCOLS = self.PATTERNS.keys() + + if self.__class__ == BasePlatform: + sub = [subclass.SKIP_DOMAINS for subclass in self.__class__.__subclasses__() if subclass.SKIP_DOMAINS] + if sub: + self.SKIP_DOMAINS = list(itertools.chain.from_iterable(sub)) + + @staticmethod + def clean_data(data): + data["path"] = "" + data["branch"] = "" + data["protocols"] = list(filter(lambda x: x, data["protocols"].split("+"))) + data["pathname"] = data["pathname"].strip(":") + return data diff --git a/third_party/python/giturlparse/giturlparse/platforms/bitbucket.py b/third_party/python/giturlparse/giturlparse/platforms/bitbucket.py new file mode 100644 index 0000000000..baab24466b --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/bitbucket.py @@ -0,0 +1,20 @@ +from .base import BasePlatform + + +class BitbucketPlatform(BasePlatform): + PATTERNS = { + "https": ( + r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<_user>.+)@(?P<domain>.+?)" + r"(?P<pathname>/(?P<owner>.+)/(?P<repo>.+?)(?:\.git)?)$" + ), + "ssh": ( + r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?):" + r"(?P<pathname>(?P<owner>.+)/(?P<repo>.+?)(?:\.git)?)$" + ), + } + FORMATS = { + "https": r"https://%(owner)s@%(domain)s/%(owner)s/%(repo)s.git", + "ssh": r"git@%(domain)s:%(owner)s/%(repo)s.git", + } + DOMAINS = ("bitbucket.org",) + DEFAULTS = {"_user": "git"} diff --git a/third_party/python/giturlparse/giturlparse/platforms/friendcode.py b/third_party/python/giturlparse/giturlparse/platforms/friendcode.py new file mode 100644 index 0000000000..6de9f17eab --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/friendcode.py @@ -0,0 +1,14 @@ +from .base import BasePlatform + + +class FriendCodePlatform(BasePlatform): + DOMAINS = ("friendco.de",) + PATTERNS = { + "https": ( + r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<domain>.+?)/" + r"(?P<pathname>(?P<owner>.+)@user/(?P<repo>.+)).git" + ), + } + FORMATS = { + "https": r"https://%(domain)s/%(owner)s@user/%(repo)s.git", + } diff --git a/third_party/python/giturlparse/giturlparse/platforms/github.py b/third_party/python/giturlparse/giturlparse/platforms/github.py new file mode 100644 index 0000000000..8eb44ef513 --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/github.py @@ -0,0 +1,39 @@ +from .base import BasePlatform + + +class GitHubPlatform(BasePlatform): + PATTERNS = { + "https": ( + r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<domain>[^/]+?)" + r"(?P<pathname>/(?P<owner>[^/]+?)/(?P<repo>[^/]+?)(?:\.git)?(?P<path_raw>(/blob/|/tree/).+)?)$" + ), + "ssh": ( + r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?)(?P<pathname>(:|/)" + r"(?P<owner>[^/]+)/(?P<repo>[^/]+?)(?:\.git)" + r"(?P<path_raw>(/blob/|/tree/).+)?)$" + ), + "git": ( + r"(?P<protocols>(?P<protocol>git))://(?P<domain>.+?)" + r"(?P<pathname>/(?P<owner>[^/]+)/(?P<repo>[^/]+?)(?:\.git)?" + r"(?P<path_raw>(/blob/|/tree/).+)?)$" + ), + } + FORMATS = { + "https": r"https://%(domain)s/%(owner)s/%(repo)s.git%(path_raw)s", + "ssh": r"git@%(domain)s:%(owner)s/%(repo)s.git%(path_raw)s", + "git": r"git://%(domain)s/%(owner)s/%(repo)s.git%(path_raw)s", + } + DOMAINS = ( + "github.com", + "gist.github.com", + ) + DEFAULTS = {"_user": "git"} + + @staticmethod + def clean_data(data): + data = BasePlatform.clean_data(data) + if data["path_raw"].startswith("/blob/"): + data["path"] = data["path_raw"].replace("/blob/", "") + if data["path_raw"].startswith("/tree/"): + data["branch"] = data["path_raw"].replace("/tree/", "") + return data diff --git a/third_party/python/giturlparse/giturlparse/platforms/gitlab.py b/third_party/python/giturlparse/giturlparse/platforms/gitlab.py new file mode 100644 index 0000000000..38b37efb23 --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/platforms/gitlab.py @@ -0,0 +1,43 @@ +from .base import BasePlatform + + +class GitLabPlatform(BasePlatform): + PATTERNS = { + "https": ( + r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<domain>.+?)(?P<port>:[0-9]+)?" + r"(?P<pathname>/(?P<owner>[^/]+?)/" + r"(?P<groups_path>.*?)?(?(groups_path)/)?(?P<repo>[^/]+?)(?:\.git)?" + r"(?P<path_raw>(/blob/|/-/tree/).+)?)$" + ), + "ssh": ( + r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?):(?P<port>[0-9]+)?(?(port))?" + r"(?P<pathname>/?(?P<owner>[^/]+)/" + r"(?P<groups_path>.*?)?(?(groups_path)/)?(?P<repo>[^/]+?)(?:\.git)?" + r"(?P<path_raw>(/blob/|/-/tree/).+)?)$" + ), + "git": ( + r"(?P<protocols>(?P<protocol>git))://(?P<domain>.+?):(?P<port>[0-9]+)?(?(port))?" + r"(?P<pathname>/?(?P<owner>[^/]+)/" + r"(?P<groups_path>.*?)?(?(groups_path)/)?(?P<repo>[^/]+?)(?:\.git)?" + r"(?P<path_raw>(/blob/|/-/tree/).+)?)$" + ), + } + FORMATS = { + "https": r"https://%(domain)s/%(owner)s/%(groups_slash)s%(repo)s.git%(path_raw)s", + "ssh": r"git@%(domain)s:%(port_slash)s%(owner)s/%(groups_slash)s%(repo)s.git%(path_raw)s", + "git": r"git://%(domain)s%(port)s/%(owner)s/%(groups_slash)s%(repo)s.git%(path_raw)s", + } + SKIP_DOMAINS = ( + "github.com", + "gist.github.com", + ) + DEFAULTS = {"_user": "git", "port": ""} + + @staticmethod + def clean_data(data): + data = BasePlatform.clean_data(data) + if data["path_raw"].startswith("/blob/"): + data["path"] = data["path_raw"].replace("/blob/", "") + if data["path_raw"].startswith("/-/tree/"): + data["branch"] = data["path_raw"].replace("/-/tree/", "") + return data diff --git a/third_party/python/giturlparse/giturlparse/result.py b/third_party/python/giturlparse/giturlparse/result.py new file mode 100644 index 0000000000..4a33136c51 --- /dev/null +++ b/third_party/python/giturlparse/giturlparse/result.py @@ -0,0 +1,131 @@ +from copy import copy + +from .platforms import PLATFORMS + +# Possible values to extract from a Git Url +REQUIRED_ATTRIBUTES = ( + "domain", + "repo", +) + + +class GitUrlParsed: + platform = None + + def __init__(self, parsed_info): + self._parsed = parsed_info + + # Set parsed objects as attributes + for k, v in parsed_info.items(): + setattr(self, k, v) + + for name, platform in PLATFORMS: + if name == self.platform: + self._platform_obj = platform + break + + def _valid_attrs(self): + return all([getattr(self, attr, None) for attr in REQUIRED_ATTRIBUTES]) # NOQA + + @property + def valid(self): + return all( + [ + self._valid_attrs(), + ] + ) + + ## + # Alias properties + ## + @property + def host(self): + return self.domain + + @property + def resource(self): + return self.domain + + @property + def name(self): + return self.repo + + @property + def user(self): + if hasattr(self, "_user"): + return self._user + + return self.owner + + @property + def groups(self): + if self.groups_path: + return self.groups_path.split("/") + else: + return [] + + def format(self, protocol): # noqa : A0003 + """Reformat URL to protocol.""" + items = copy(self._parsed) + items["port_slash"] = "%s/" % self.port if self.port else "" + items["groups_slash"] = "%s/" % self.groups_path if self.groups_path else "" + return self._platform_obj.FORMATS[protocol] % items + + @property + def normalized(self): + """Normalize URL.""" + return self.format(self.protocol) + + ## + # Rewriting + ## + @property + def url2ssh(self): + return self.format("ssh") + + @property + def url2http(self): + return self.format("http") + + @property + def url2https(self): + return self.format("https") + + @property + def url2git(self): + return self.format("git") + + # All supported Urls for a repo + @property + def urls(self): + return {protocol: self.format(protocol) for protocol in self._platform_obj.PROTOCOLS} + + ## + # Platforms + ## + @property + def github(self): + return self.platform == "github" + + @property + def bitbucket(self): + return self.platform == "bitbucket" + + @property + def friendcode(self): + return self.platform == "friendcode" + + @property + def assembla(self): + return self.platform == "assembla" + + @property + def gitlab(self): + return self.platform == "gitlab" + + ## + # Get data as dict + ## + @property + def data(self): + return dict(self._parsed) |