summaryrefslogtreecommitdiffstats
path: root/third_party/python/giturlparse/giturlparse
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/giturlparse/giturlparse')
-rw-r--r--third_party/python/giturlparse/giturlparse/__init__.py14
-rw-r--r--third_party/python/giturlparse/giturlparse/parser.py69
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/__init__.py18
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/assembla.py14
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/base.py43
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/bitbucket.py20
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/friendcode.py14
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/github.py39
-rw-r--r--third_party/python/giturlparse/giturlparse/platforms/gitlab.py43
-rw-r--r--third_party/python/giturlparse/giturlparse/result.py131
10 files changed, 405 insertions, 0 deletions
diff --git a/third_party/python/giturlparse/giturlparse/__init__.py b/third_party/python/giturlparse/giturlparse/__init__.py
new file mode 100644
index 0000000000..aee86e3750
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/__init__.py
@@ -0,0 +1,14 @@
+from .parser import parse as _parse
+from .result import GitUrlParsed
+
+__author__ = "Iacopo Spalletti"
+__email__ = "i.spalletti@nephila.it"
+__version__ = "0.10.0"
+
+
+def parse(url, check_domain=True):
+ return GitUrlParsed(_parse(url, check_domain))
+
+
+def validate(url, check_domain=True):
+ return parse(url, check_domain).valid
diff --git a/third_party/python/giturlparse/giturlparse/parser.py b/third_party/python/giturlparse/giturlparse/parser.py
new file mode 100644
index 0000000000..c67f03500d
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/parser.py
@@ -0,0 +1,69 @@
+from collections import defaultdict
+
+from .platforms import PLATFORMS
+
+SUPPORTED_ATTRIBUTES = (
+ "domain",
+ "repo",
+ "owner",
+ "path_raw",
+ "groups_path",
+ "_user",
+ "port",
+ "url",
+ "platform",
+ "protocol",
+)
+
+
+def parse(url, check_domain=True):
+ # Values are None by default
+ parsed_info = defaultdict(lambda: None)
+ parsed_info["port"] = ""
+ parsed_info["path_raw"] = ""
+ parsed_info["groups_path"] = ""
+
+ # Defaults to all attributes
+ map(parsed_info.setdefault, SUPPORTED_ATTRIBUTES)
+
+ for name, platform in PLATFORMS:
+ for protocol, regex in platform.COMPILED_PATTERNS.items():
+ # print(name, protocol, regex)
+ # Match current regex against URL
+ match = regex.match(url)
+
+ # Skip if not matched
+ if not match:
+ # print("[%s] URL: %s dit not match %s" % (name, url, regex.pattern))
+ continue
+
+ # Skip if domain is bad
+ domain = match.group("domain")
+ # print('[%s] DOMAIN = %s' % (url, domain,))
+ if check_domain:
+ if platform.DOMAINS and not (domain in platform.DOMAINS):
+ continue
+ if platform.SKIP_DOMAINS and domain in platform.SKIP_DOMAINS:
+ continue
+
+ # add in platform defaults
+ parsed_info.update(platform.DEFAULTS)
+
+ # Get matches as dictionary
+ matches = platform.clean_data(match.groupdict(default=""))
+
+ # Update info with matches
+ parsed_info.update(matches)
+
+ # Update info with platform info
+ parsed_info.update(
+ {
+ "url": url,
+ "platform": name,
+ "protocol": protocol,
+ }
+ )
+ return parsed_info
+
+ # Empty if none matched
+ return parsed_info
diff --git a/third_party/python/giturlparse/giturlparse/platforms/__init__.py b/third_party/python/giturlparse/giturlparse/platforms/__init__.py
new file mode 100644
index 0000000000..8add1b7a78
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/__init__.py
@@ -0,0 +1,18 @@
+from .assembla import AssemblaPlatform
+from .base import BasePlatform
+from .bitbucket import BitbucketPlatform
+from .friendcode import FriendCodePlatform
+from .github import GitHubPlatform
+from .gitlab import GitLabPlatform
+
+# Supported platforms
+PLATFORMS = [
+ # name -> Platform object
+ ("github", GitHubPlatform()),
+ ("bitbucket", BitbucketPlatform()),
+ ("friendcode", FriendCodePlatform()),
+ ("assembla", AssemblaPlatform()),
+ ("gitlab", GitLabPlatform()),
+ # Match url
+ ("base", BasePlatform()),
+]
diff --git a/third_party/python/giturlparse/giturlparse/platforms/assembla.py b/third_party/python/giturlparse/giturlparse/platforms/assembla.py
new file mode 100644
index 0000000000..2624e85954
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/assembla.py
@@ -0,0 +1,14 @@
+from .base import BasePlatform
+
+
+class AssemblaPlatform(BasePlatform):
+ DOMAINS = ("git.assembla.com",)
+ PATTERNS = {
+ "ssh": r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?):(?P<pathname>(?P<repo>.+)).git",
+ "git": r"(?P<protocols>(?P<protocol>git))://(?P<domain>.+?)/(?P<pathname>(?P<repo>.+)).git",
+ }
+ FORMATS = {
+ "ssh": r"git@%(domain)s:%(repo)s.git",
+ "git": r"git://%(domain)s/%(repo)s.git",
+ }
+ DEFAULTS = {"_user": "git"}
diff --git a/third_party/python/giturlparse/giturlparse/platforms/base.py b/third_party/python/giturlparse/giturlparse/platforms/base.py
new file mode 100644
index 0000000000..000726381d
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/base.py
@@ -0,0 +1,43 @@
+import itertools
+import re
+
+
+class BasePlatform:
+ FORMATS = {
+ "ssh": r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?%(_user)s@%(host)s:%(repo)s.git",
+ "http": r"(?P<protocols>(git\+)?(?P<protocol>http))://%(host)s/%(repo)s.git",
+ "https": r"(?P<protocols>(git\+)?(?P<protocol>https))://%(host)s/%(repo)s.git",
+ "git": r"(?P<protocols>(?P<protocol>git))://%(host)s/%(repo)s.git",
+ }
+
+ PATTERNS = {
+ "ssh": r"(?P<_user>.+)@(?P<domain>[^/]+?):(?P<repo>.+).git",
+ "http": r"http://(?P<domain>[^/]+?)/(?P<repo>.+).git",
+ "https": r"https://(?P<domain>[^/]+?)/(?P<repo>.+).git",
+ "git": r"git://(?P<domain>[^/]+?)/(?P<repo>.+).git",
+ }
+
+ # None means it matches all domains
+ DOMAINS = None
+ SKIP_DOMAINS = None
+ DEFAULTS = {}
+
+ def __init__(self):
+ # Precompile PATTERNS
+ self.COMPILED_PATTERNS = {proto: re.compile(regex, re.IGNORECASE) for proto, regex in self.PATTERNS.items()}
+
+ # Supported protocols
+ self.PROTOCOLS = self.PATTERNS.keys()
+
+ if self.__class__ == BasePlatform:
+ sub = [subclass.SKIP_DOMAINS for subclass in self.__class__.__subclasses__() if subclass.SKIP_DOMAINS]
+ if sub:
+ self.SKIP_DOMAINS = list(itertools.chain.from_iterable(sub))
+
+ @staticmethod
+ def clean_data(data):
+ data["path"] = ""
+ data["branch"] = ""
+ data["protocols"] = list(filter(lambda x: x, data["protocols"].split("+")))
+ data["pathname"] = data["pathname"].strip(":")
+ return data
diff --git a/third_party/python/giturlparse/giturlparse/platforms/bitbucket.py b/third_party/python/giturlparse/giturlparse/platforms/bitbucket.py
new file mode 100644
index 0000000000..baab24466b
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/bitbucket.py
@@ -0,0 +1,20 @@
+from .base import BasePlatform
+
+
+class BitbucketPlatform(BasePlatform):
+ PATTERNS = {
+ "https": (
+ r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<_user>.+)@(?P<domain>.+?)"
+ r"(?P<pathname>/(?P<owner>.+)/(?P<repo>.+?)(?:\.git)?)$"
+ ),
+ "ssh": (
+ r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?):"
+ r"(?P<pathname>(?P<owner>.+)/(?P<repo>.+?)(?:\.git)?)$"
+ ),
+ }
+ FORMATS = {
+ "https": r"https://%(owner)s@%(domain)s/%(owner)s/%(repo)s.git",
+ "ssh": r"git@%(domain)s:%(owner)s/%(repo)s.git",
+ }
+ DOMAINS = ("bitbucket.org",)
+ DEFAULTS = {"_user": "git"}
diff --git a/third_party/python/giturlparse/giturlparse/platforms/friendcode.py b/third_party/python/giturlparse/giturlparse/platforms/friendcode.py
new file mode 100644
index 0000000000..6de9f17eab
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/friendcode.py
@@ -0,0 +1,14 @@
+from .base import BasePlatform
+
+
+class FriendCodePlatform(BasePlatform):
+ DOMAINS = ("friendco.de",)
+ PATTERNS = {
+ "https": (
+ r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<domain>.+?)/"
+ r"(?P<pathname>(?P<owner>.+)@user/(?P<repo>.+)).git"
+ ),
+ }
+ FORMATS = {
+ "https": r"https://%(domain)s/%(owner)s@user/%(repo)s.git",
+ }
diff --git a/third_party/python/giturlparse/giturlparse/platforms/github.py b/third_party/python/giturlparse/giturlparse/platforms/github.py
new file mode 100644
index 0000000000..8eb44ef513
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/github.py
@@ -0,0 +1,39 @@
+from .base import BasePlatform
+
+
+class GitHubPlatform(BasePlatform):
+ PATTERNS = {
+ "https": (
+ r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<domain>[^/]+?)"
+ r"(?P<pathname>/(?P<owner>[^/]+?)/(?P<repo>[^/]+?)(?:\.git)?(?P<path_raw>(/blob/|/tree/).+)?)$"
+ ),
+ "ssh": (
+ r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?)(?P<pathname>(:|/)"
+ r"(?P<owner>[^/]+)/(?P<repo>[^/]+?)(?:\.git)"
+ r"(?P<path_raw>(/blob/|/tree/).+)?)$"
+ ),
+ "git": (
+ r"(?P<protocols>(?P<protocol>git))://(?P<domain>.+?)"
+ r"(?P<pathname>/(?P<owner>[^/]+)/(?P<repo>[^/]+?)(?:\.git)?"
+ r"(?P<path_raw>(/blob/|/tree/).+)?)$"
+ ),
+ }
+ FORMATS = {
+ "https": r"https://%(domain)s/%(owner)s/%(repo)s.git%(path_raw)s",
+ "ssh": r"git@%(domain)s:%(owner)s/%(repo)s.git%(path_raw)s",
+ "git": r"git://%(domain)s/%(owner)s/%(repo)s.git%(path_raw)s",
+ }
+ DOMAINS = (
+ "github.com",
+ "gist.github.com",
+ )
+ DEFAULTS = {"_user": "git"}
+
+ @staticmethod
+ def clean_data(data):
+ data = BasePlatform.clean_data(data)
+ if data["path_raw"].startswith("/blob/"):
+ data["path"] = data["path_raw"].replace("/blob/", "")
+ if data["path_raw"].startswith("/tree/"):
+ data["branch"] = data["path_raw"].replace("/tree/", "")
+ return data
diff --git a/third_party/python/giturlparse/giturlparse/platforms/gitlab.py b/third_party/python/giturlparse/giturlparse/platforms/gitlab.py
new file mode 100644
index 0000000000..38b37efb23
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/platforms/gitlab.py
@@ -0,0 +1,43 @@
+from .base import BasePlatform
+
+
+class GitLabPlatform(BasePlatform):
+ PATTERNS = {
+ "https": (
+ r"(?P<protocols>(git\+)?(?P<protocol>https))://(?P<domain>.+?)(?P<port>:[0-9]+)?"
+ r"(?P<pathname>/(?P<owner>[^/]+?)/"
+ r"(?P<groups_path>.*?)?(?(groups_path)/)?(?P<repo>[^/]+?)(?:\.git)?"
+ r"(?P<path_raw>(/blob/|/-/tree/).+)?)$"
+ ),
+ "ssh": (
+ r"(?P<protocols>(git\+)?(?P<protocol>ssh))?(://)?git@(?P<domain>.+?):(?P<port>[0-9]+)?(?(port))?"
+ r"(?P<pathname>/?(?P<owner>[^/]+)/"
+ r"(?P<groups_path>.*?)?(?(groups_path)/)?(?P<repo>[^/]+?)(?:\.git)?"
+ r"(?P<path_raw>(/blob/|/-/tree/).+)?)$"
+ ),
+ "git": (
+ r"(?P<protocols>(?P<protocol>git))://(?P<domain>.+?):(?P<port>[0-9]+)?(?(port))?"
+ r"(?P<pathname>/?(?P<owner>[^/]+)/"
+ r"(?P<groups_path>.*?)?(?(groups_path)/)?(?P<repo>[^/]+?)(?:\.git)?"
+ r"(?P<path_raw>(/blob/|/-/tree/).+)?)$"
+ ),
+ }
+ FORMATS = {
+ "https": r"https://%(domain)s/%(owner)s/%(groups_slash)s%(repo)s.git%(path_raw)s",
+ "ssh": r"git@%(domain)s:%(port_slash)s%(owner)s/%(groups_slash)s%(repo)s.git%(path_raw)s",
+ "git": r"git://%(domain)s%(port)s/%(owner)s/%(groups_slash)s%(repo)s.git%(path_raw)s",
+ }
+ SKIP_DOMAINS = (
+ "github.com",
+ "gist.github.com",
+ )
+ DEFAULTS = {"_user": "git", "port": ""}
+
+ @staticmethod
+ def clean_data(data):
+ data = BasePlatform.clean_data(data)
+ if data["path_raw"].startswith("/blob/"):
+ data["path"] = data["path_raw"].replace("/blob/", "")
+ if data["path_raw"].startswith("/-/tree/"):
+ data["branch"] = data["path_raw"].replace("/-/tree/", "")
+ return data
diff --git a/third_party/python/giturlparse/giturlparse/result.py b/third_party/python/giturlparse/giturlparse/result.py
new file mode 100644
index 0000000000..4a33136c51
--- /dev/null
+++ b/third_party/python/giturlparse/giturlparse/result.py
@@ -0,0 +1,131 @@
+from copy import copy
+
+from .platforms import PLATFORMS
+
+# Possible values to extract from a Git Url
+REQUIRED_ATTRIBUTES = (
+ "domain",
+ "repo",
+)
+
+
+class GitUrlParsed:
+ platform = None
+
+ def __init__(self, parsed_info):
+ self._parsed = parsed_info
+
+ # Set parsed objects as attributes
+ for k, v in parsed_info.items():
+ setattr(self, k, v)
+
+ for name, platform in PLATFORMS:
+ if name == self.platform:
+ self._platform_obj = platform
+ break
+
+ def _valid_attrs(self):
+ return all([getattr(self, attr, None) for attr in REQUIRED_ATTRIBUTES]) # NOQA
+
+ @property
+ def valid(self):
+ return all(
+ [
+ self._valid_attrs(),
+ ]
+ )
+
+ ##
+ # Alias properties
+ ##
+ @property
+ def host(self):
+ return self.domain
+
+ @property
+ def resource(self):
+ return self.domain
+
+ @property
+ def name(self):
+ return self.repo
+
+ @property
+ def user(self):
+ if hasattr(self, "_user"):
+ return self._user
+
+ return self.owner
+
+ @property
+ def groups(self):
+ if self.groups_path:
+ return self.groups_path.split("/")
+ else:
+ return []
+
+ def format(self, protocol): # noqa : A0003
+ """Reformat URL to protocol."""
+ items = copy(self._parsed)
+ items["port_slash"] = "%s/" % self.port if self.port else ""
+ items["groups_slash"] = "%s/" % self.groups_path if self.groups_path else ""
+ return self._platform_obj.FORMATS[protocol] % items
+
+ @property
+ def normalized(self):
+ """Normalize URL."""
+ return self.format(self.protocol)
+
+ ##
+ # Rewriting
+ ##
+ @property
+ def url2ssh(self):
+ return self.format("ssh")
+
+ @property
+ def url2http(self):
+ return self.format("http")
+
+ @property
+ def url2https(self):
+ return self.format("https")
+
+ @property
+ def url2git(self):
+ return self.format("git")
+
+ # All supported Urls for a repo
+ @property
+ def urls(self):
+ return {protocol: self.format(protocol) for protocol in self._platform_obj.PROTOCOLS}
+
+ ##
+ # Platforms
+ ##
+ @property
+ def github(self):
+ return self.platform == "github"
+
+ @property
+ def bitbucket(self):
+ return self.platform == "bitbucket"
+
+ @property
+ def friendcode(self):
+ return self.platform == "friendcode"
+
+ @property
+ def assembla(self):
+ return self.platform == "assembla"
+
+ @property
+ def gitlab(self):
+ return self.platform == "gitlab"
+
+ ##
+ # Get data as dict
+ ##
+ @property
+ def data(self):
+ return dict(self._parsed)