# vim:fileencoding=utf-8:noet '''Encodings support This is the only module from which functions obtaining encoding should be exported. Note: you should always care about errors= argument since it is not guaranteed that encoding returned by some function can encode/decode given string. All functions in this module must always return a valid encoding. Most of them are not thread-safe. ''' from __future__ import (unicode_literals, division, absolute_import, print_function) import sys import locale def get_preferred_file_name_encoding(): '''Get preferred file name encoding ''' return ( sys.getfilesystemencoding() or locale.getpreferredencoding() or 'utf-8' ) def get_preferred_file_contents_encoding(): '''Get encoding preferred for file contents ''' return ( locale.getpreferredencoding() or 'utf-8' ) def get_preferred_output_encoding(): '''Get encoding that should be used for printing strings .. warning:: Falls back to ASCII, so that output is most likely to be displayed correctly. ''' if hasattr(locale, 'LC_MESSAGES'): return ( locale.getlocale(locale.LC_MESSAGES)[1] or locale.getdefaultlocale()[1] or 'ascii' ) return ( locale.getdefaultlocale()[1] or 'ascii' ) def get_preferred_input_encoding(): '''Get encoding that should be used for reading shell command output .. warning:: Falls back to latin1 so that function is less likely to throw as decoded output is primary searched for ASCII values. ''' if hasattr(locale, 'LC_MESSAGES'): return ( locale.getlocale(locale.LC_MESSAGES)[1] or locale.getdefaultlocale()[1] or 'latin1' ) return ( locale.getdefaultlocale()[1] or 'latin1' ) def get_preferred_arguments_encoding(): '''Get encoding that should be used for command-line arguments .. warning:: Falls back to latin1 so that function is less likely to throw as non-ASCII command-line arguments most likely contain non-ASCII filenames and screwing them up due to unidentified locale is not much of a problem. ''' return ( locale.getdefaultlocale()[1] or 'latin1' ) def get_preferred_environment_encoding(): '''Get encoding that should be used for decoding environment variables ''' return ( locale.getpreferredencoding() or 'utf-8' ) def get_unicode_writer(stream=sys.stdout, encoding=None, errors='replace'): '''Get function which will write unicode string to the given stream Writing is done using encoding returned by :py:func:`get_preferred_output_encoding`. :param file stream: Stream to write to. Default value is :py:attr:`sys.stdout`. :param str encoding: Determines which encoding to use. If this argument is specified then :py:func:`get_preferred_output_encoding` is not used. :param str errors: Determines what to do with characters which cannot be encoded. See ``errors`` argument of :py:func:`codecs.encode`. :return: Callable which writes unicode string to the given stream using the preferred output encoding. ''' encoding = encoding or get_preferred_output_encoding() if sys.version_info < (3,) or not hasattr(stream, 'buffer'): return lambda s: stream.write(s.encode(encoding, errors)) else: return lambda s: stream.buffer.write(s.encode(encoding, errors))