From 5f208e04c159791e668031a7fa83f98724ec8d24 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 3 Nov 2020 07:07:45 +0100 Subject: Adding upstream version 0.14.0. Signed-off-by: Daniel Baumann --- gitlint/utils.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'gitlint/utils.py') diff --git a/gitlint/utils.py b/gitlint/utils.py index c418347..89015e7 100644 --- a/gitlint/utils.py +++ b/gitlint/utils.py @@ -1,4 +1,5 @@ # pylint: disable=bad-option-value,unidiomatic-typecheck,undefined-variable,no-else-return +import codecs import platform import sys import os @@ -23,6 +24,16 @@ def platform_is_windows(): PLATFORM_IS_WINDOWS = platform_is_windows() +######################################################################################################################## +# IS_PY2 + + +def is_py2(): + return sys.version_info[0] == 2 + + +IS_PY2 = is_py2() + ######################################################################################################################## # USE_SH_LIB # Determine whether to use the `sh` library @@ -46,13 +57,14 @@ USE_SH_LIB = use_sh_library() def getpreferredencoding(): """ Modified version of local.getpreferredencoding() that takes into account LC_ALL, LC_CTYPE, LANG env vars on windows and falls back to UTF-8. """ - default_encoding = locale.getpreferredencoding() or "UTF-8" + fallback_encoding = "UTF-8" + default_encoding = locale.getpreferredencoding() or fallback_encoding # On Windows, we mimic git/linux by trying to read the LC_ALL, LC_CTYPE, LANG env vars manually # (on Linux/MacOS the `getpreferredencoding()` call will take care of this). # We fallback to UTF-8 if PLATFORM_IS_WINDOWS: - default_encoding = "UTF-8" + default_encoding = fallback_encoding for env_var in ["LC_ALL", "LC_CTYPE", "LANG"]: encoding = os.environ.get(env_var, False) if encoding: @@ -65,6 +77,15 @@ def getpreferredencoding(): default_encoding = encoding break + # We've determined what encoding the user *wants*, let's now check if it's actually a valid encoding on the + # system. If not, fallback to UTF-8. + # This scenario is fairly common on Windows where git sets LC_CTYPE=C when invoking the commit-msg hook, which + # is not a valid encoding in Python on Windows. + try: + codecs.lookup(default_encoding) + except LookupError: + default_encoding = fallback_encoding + return default_encoding @@ -76,7 +97,7 @@ DEFAULT_ENCODING = getpreferredencoding() def ustr(obj): """ Python 2 and 3 utility method that converts an obj to unicode in python 2 and to a str object in python 3""" - if sys.version_info[0] == 2: + if IS_PY2: # If we are getting a string, then do an explicit decode # else, just call the unicode method of the object if type(obj) in [str, basestring]: # pragma: no cover # noqa @@ -94,11 +115,13 @@ def sstr(obj): """ Python 2 and 3 utility method that converts an obj to a DEFAULT_ENCODING encoded string in python 2 and to unicode in python 3. Especially useful for implementing __str__ methods in python 2: http://stackoverflow.com/a/1307210/381010""" - if sys.version_info[0] == 2: - # For lists in python2, remove unicode string representation characters. + if IS_PY2: + # For lists and tuples in python2, remove unicode string representation characters. # i.e. ensure lists are printed as ['a', 'b'] and not [u'a', u'b'] if type(obj) in [list]: return [sstr(item) for item in obj] # pragma: no cover # noqa + elif type(obj) in [tuple]: + return tuple(sstr(item) for item in obj) # pragma: no cover # noqa return unicode(obj).encode(DEFAULT_ENCODING) # pragma: no cover # noqa else: -- cgit v1.2.3