diff options
Diffstat (limited to '')
-rw-r--r-- | gitlint/utils.py | 33 |
1 files changed, 28 insertions, 5 deletions
diff --git a/gitlint/utils.py b/gitlint/utils.py index c418347..89015e7 100644 --- a/gitlint/utils.py +++ b/gitlint/utils.py @@ -1,4 +1,5 @@ # pylint: disable=bad-option-value,unidiomatic-typecheck,undefined-variable,no-else-return +import codecs import platform import sys import os @@ -24,6 +25,16 @@ def platform_is_windows(): PLATFORM_IS_WINDOWS = platform_is_windows() ######################################################################################################################## +# IS_PY2 + + +def is_py2(): + return sys.version_info[0] == 2 + + +IS_PY2 = is_py2() + +######################################################################################################################## # USE_SH_LIB # Determine whether to use the `sh` library # On windows we won't want to use the sh library since it's not supported - instead we'll use our own shell module. @@ -46,13 +57,14 @@ USE_SH_LIB = use_sh_library() def getpreferredencoding(): """ Modified version of local.getpreferredencoding() that takes into account LC_ALL, LC_CTYPE, LANG env vars on windows and falls back to UTF-8. """ - default_encoding = locale.getpreferredencoding() or "UTF-8" + fallback_encoding = "UTF-8" + default_encoding = locale.getpreferredencoding() or fallback_encoding # On Windows, we mimic git/linux by trying to read the LC_ALL, LC_CTYPE, LANG env vars manually # (on Linux/MacOS the `getpreferredencoding()` call will take care of this). # We fallback to UTF-8 if PLATFORM_IS_WINDOWS: - default_encoding = "UTF-8" + default_encoding = fallback_encoding for env_var in ["LC_ALL", "LC_CTYPE", "LANG"]: encoding = os.environ.get(env_var, False) if encoding: @@ -65,6 +77,15 @@ def getpreferredencoding(): default_encoding = encoding break + # We've determined what encoding the user *wants*, let's now check if it's actually a valid encoding on the + # system. If not, fallback to UTF-8. + # This scenario is fairly common on Windows where git sets LC_CTYPE=C when invoking the commit-msg hook, which + # is not a valid encoding in Python on Windows. + try: + codecs.lookup(default_encoding) + except LookupError: + default_encoding = fallback_encoding + return default_encoding @@ -76,7 +97,7 @@ DEFAULT_ENCODING = getpreferredencoding() def ustr(obj): """ Python 2 and 3 utility method that converts an obj to unicode in python 2 and to a str object in python 3""" - if sys.version_info[0] == 2: + if IS_PY2: # If we are getting a string, then do an explicit decode # else, just call the unicode method of the object if type(obj) in [str, basestring]: # pragma: no cover # noqa @@ -94,11 +115,13 @@ def sstr(obj): """ Python 2 and 3 utility method that converts an obj to a DEFAULT_ENCODING encoded string in python 2 and to unicode in python 3. Especially useful for implementing __str__ methods in python 2: http://stackoverflow.com/a/1307210/381010""" - if sys.version_info[0] == 2: - # For lists in python2, remove unicode string representation characters. + if IS_PY2: + # For lists and tuples in python2, remove unicode string representation characters. # i.e. ensure lists are printed as ['a', 'b'] and not [u'a', u'b'] if type(obj) in [list]: return [sstr(item) for item in obj] # pragma: no cover # noqa + elif type(obj) in [tuple]: + return tuple(sstr(item) for item in obj) # pragma: no cover # noqa return unicode(obj).encode(DEFAULT_ENCODING) # pragma: no cover # noqa else: |