1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# vim:fileencoding=utf-8:noet
'''Encodings support
This is the only module from which functions obtaining encoding should be
exported. Note: you should always care about errors= argument since it is not
guaranteed that encoding returned by some function can encode/decode given
string.
All functions in this module must always return a valid encoding. Most of them
are not thread-safe.
'''
from __future__ import (unicode_literals, division, absolute_import, print_function)
import sys
import locale
def get_preferred_file_name_encoding():
'''Get preferred file name encoding
'''
return (
sys.getfilesystemencoding()
or locale.getpreferredencoding()
or 'utf-8'
)
def get_preferred_file_contents_encoding():
'''Get encoding preferred for file contents
'''
return (
locale.getpreferredencoding()
or 'utf-8'
)
def get_preferred_output_encoding():
'''Get encoding that should be used for printing strings
.. warning::
Falls back to ASCII, so that output is most likely to be displayed
correctly.
'''
if hasattr(locale, 'LC_MESSAGES'):
return (
locale.getlocale(locale.LC_MESSAGES)[1]
or locale.getdefaultlocale()[1]
or 'ascii'
)
return (
locale.getdefaultlocale()[1]
or 'ascii'
)
def get_preferred_input_encoding():
'''Get encoding that should be used for reading shell command output
.. warning::
Falls back to latin1 so that function is less likely to throw as decoded
output is primary searched for ASCII values.
'''
if hasattr(locale, 'LC_MESSAGES'):
return (
locale.getlocale(locale.LC_MESSAGES)[1]
or locale.getdefaultlocale()[1]
or 'latin1'
)
return (
locale.getdefaultlocale()[1]
or 'latin1'
)
def get_preferred_arguments_encoding():
'''Get encoding that should be used for command-line arguments
.. warning::
Falls back to latin1 so that function is less likely to throw as
non-ASCII command-line arguments most likely contain non-ASCII
filenames and screwing them up due to unidentified locale is not much of
a problem.
'''
return (
locale.getdefaultlocale()[1]
or 'latin1'
)
def get_preferred_environment_encoding():
'''Get encoding that should be used for decoding environment variables
'''
return (
locale.getpreferredencoding()
or 'utf-8'
)
def get_unicode_writer(stream=sys.stdout, encoding=None, errors='replace'):
'''Get function which will write unicode string to the given stream
Writing is done using encoding returned by
:py:func:`get_preferred_output_encoding`.
:param file stream:
Stream to write to. Default value is :py:attr:`sys.stdout`.
:param str encoding:
Determines which encoding to use. If this argument is specified then
:py:func:`get_preferred_output_encoding` is not used.
:param str errors:
Determines what to do with characters which cannot be encoded. See
``errors`` argument of :py:func:`codecs.encode`.
:return: Callable which writes unicode string to the given stream using
the preferred output encoding.
'''
encoding = encoding or get_preferred_output_encoding()
if sys.version_info < (3,) or not hasattr(stream, 'buffer'):
return lambda s: stream.write(s.encode(encoding, errors))
else:
return lambda s: stream.buffer.write(s.encode(encoding, errors))
|