summaryrefslogtreecommitdiffstats
path: root/gitlint/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'gitlint/utils.py')
-rw-r--r--gitlint/utils.py33
1 files changed, 28 insertions, 5 deletions
diff --git a/gitlint/utils.py b/gitlint/utils.py
index c418347..89015e7 100644
--- a/gitlint/utils.py
+++ b/gitlint/utils.py
@@ -1,4 +1,5 @@
# pylint: disable=bad-option-value,unidiomatic-typecheck,undefined-variable,no-else-return
+import codecs
import platform
import sys
import os
@@ -24,6 +25,16 @@ def platform_is_windows():
PLATFORM_IS_WINDOWS = platform_is_windows()
########################################################################################################################
+# IS_PY2
+
+
+def is_py2():
+ return sys.version_info[0] == 2
+
+
+IS_PY2 = is_py2()
+
+########################################################################################################################
# USE_SH_LIB
# Determine whether to use the `sh` library
# On windows we won't want to use the sh library since it's not supported - instead we'll use our own shell module.
@@ -46,13 +57,14 @@ USE_SH_LIB = use_sh_library()
def getpreferredencoding():
""" Modified version of local.getpreferredencoding() that takes into account LC_ALL, LC_CTYPE, LANG env vars
on windows and falls back to UTF-8. """
- default_encoding = locale.getpreferredencoding() or "UTF-8"
+ fallback_encoding = "UTF-8"
+ default_encoding = locale.getpreferredencoding() or fallback_encoding
# On Windows, we mimic git/linux by trying to read the LC_ALL, LC_CTYPE, LANG env vars manually
# (on Linux/MacOS the `getpreferredencoding()` call will take care of this).
# We fallback to UTF-8
if PLATFORM_IS_WINDOWS:
- default_encoding = "UTF-8"
+ default_encoding = fallback_encoding
for env_var in ["LC_ALL", "LC_CTYPE", "LANG"]:
encoding = os.environ.get(env_var, False)
if encoding:
@@ -65,6 +77,15 @@ def getpreferredencoding():
default_encoding = encoding
break
+ # We've determined what encoding the user *wants*, let's now check if it's actually a valid encoding on the
+ # system. If not, fallback to UTF-8.
+ # This scenario is fairly common on Windows where git sets LC_CTYPE=C when invoking the commit-msg hook, which
+ # is not a valid encoding in Python on Windows.
+ try:
+ codecs.lookup(default_encoding)
+ except LookupError:
+ default_encoding = fallback_encoding
+
return default_encoding
@@ -76,7 +97,7 @@ DEFAULT_ENCODING = getpreferredencoding()
def ustr(obj):
""" Python 2 and 3 utility method that converts an obj to unicode in python 2 and to a str object in python 3"""
- if sys.version_info[0] == 2:
+ if IS_PY2:
# If we are getting a string, then do an explicit decode
# else, just call the unicode method of the object
if type(obj) in [str, basestring]: # pragma: no cover # noqa
@@ -94,11 +115,13 @@ def sstr(obj):
""" Python 2 and 3 utility method that converts an obj to a DEFAULT_ENCODING encoded string in python 2
and to unicode in python 3.
Especially useful for implementing __str__ methods in python 2: http://stackoverflow.com/a/1307210/381010"""
- if sys.version_info[0] == 2:
- # For lists in python2, remove unicode string representation characters.
+ if IS_PY2:
+ # For lists and tuples in python2, remove unicode string representation characters.
# i.e. ensure lists are printed as ['a', 'b'] and not [u'a', u'b']
if type(obj) in [list]:
return [sstr(item) for item in obj] # pragma: no cover # noqa
+ elif type(obj) in [tuple]:
+ return tuple(sstr(item) for item in obj) # pragma: no cover # noqa
return unicode(obj).encode(DEFAULT_ENCODING) # pragma: no cover # noqa
else: