cli_helpers/tabular_output/preprocessors.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353

# -*- coding: utf-8 -*-
"""These preprocessor functions are used to process data prior to output."""

import string

from cli_helpers import utils
from cli_helpers.compat import text_type, int_types, float_types, HAS_PYGMENTS, Token


def truncate_string(
    data, headers, max_field_width=None, skip_multiline_string=True, **_
):
    """Truncate very long strings. Only needed for tabular
    representation, because trying to tabulate very long data
    is problematic in terms of performance, and does not make any
    sense visually.

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param int max_field_width: Width to truncate field for display
    :return: The processed data and headers.
    :rtype: tuple
    """
    return (
        (
            [
                utils.truncate_string(v, max_field_width, skip_multiline_string)
                for v in row
            ]
            for row in data
        ),
        [
            utils.truncate_string(h, max_field_width, skip_multiline_string)
            for h in headers
        ],
    )


def convert_to_string(data, headers, **_):
    """Convert all *data* and *headers* to strings.

    Binary data that cannot be decoded is converted to a hexadecimal
    representation via :func:`binascii.hexlify`.

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :return: The processed data and headers.
    :rtype: tuple

    """
    return (
        ([utils.to_string(v) for v in row] for row in data),
        [utils.to_string(h) for h in headers],
    )


def override_missing_value(
    data,
    headers,
    style=None,
    missing_value_token=Token.Output.Null,
    missing_value="",
    **_,
):
    """Override missing values in the *data* with *missing_value*.

    A missing value is any value that is :data:`None`.

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param style: Style for missing_value.
    :param missing_value_token: The Pygments token used for missing data.
    :param missing_value: The default value to use for missing data.
    :return: The processed data and headers.
    :rtype: tuple

    """

    def fields():
        for row in data:
            processed = []
            for field in row:
                if field is None and style and HAS_PYGMENTS:
                    styled = utils.style_field(
                        missing_value_token, missing_value, style
                    )
                    processed.append(styled)
                elif field is None:
                    processed.append(missing_value)
                else:
                    processed.append(field)
            yield processed

    return (fields(), headers)


def override_tab_value(data, headers, new_value="    ", **_):
    """Override tab values in the *data* with *new_value*.

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param new_value: The new value to use for tab.
    :return: The processed data and headers.
    :rtype: tuple

    """
    return (
        (
            [v.replace("\t", new_value) if isinstance(v, text_type) else v for v in row]
            for row in data
        ),
        headers,
    )


def escape_newlines(data, headers, **_):
    """Escape newline characters (\n -> \\n, \r -> \\r)

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :return: The processed data and headers.
    :rtype: tuple

    """
    return (
        (
            [
                v.replace("\r", r"\r").replace("\n", r"\n")
                if isinstance(v, text_type)
                else v
                for v in row
            ]
            for row in data
        ),
        headers,
    )


def bytes_to_string(data, headers, **_):
    """Convert all *data* and *headers* bytes to strings.

    Binary data that cannot be decoded is converted to a hexadecimal
    representation via :func:`binascii.hexlify`.

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :return: The processed data and headers.
    :rtype: tuple

    """
    return (
        ([utils.bytes_to_string(v) for v in row] for row in data),
        [utils.bytes_to_string(h) for h in headers],
    )


def align_decimals(data, headers, column_types=(), **_):
    """Align numbers in *data* on their decimal points.

    Whitespace padding is added before a number so that all numbers in a
    column are aligned.

    Outputting data before aligning the decimals::

        1
        2.1
        10.59

    Outputting data after aligning the decimals::

         1
         2.1
        10.59

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param iterable column_types: The columns' type objects (e.g. int or float).
    :return: The processed data and headers.
    :rtype: tuple

    """
    pointpos = len(headers) * [0]
    data = list(data)
    for row in data:
        for i, v in enumerate(row):
            if column_types[i] is float and type(v) in float_types:
                v = text_type(v)
                pointpos[i] = max(utils.intlen(v), pointpos[i])

    def results(data):
        for row in data:
            result = []
            for i, v in enumerate(row):
                if column_types[i] is float and type(v) in float_types:
                    v = text_type(v)
                    result.append((pointpos[i] - utils.intlen(v)) * " " + v)
                else:
                    result.append(v)
            yield result

    return results(data), headers


def quote_whitespaces(data, headers, quotestyle="'", **_):
    """Quote leading/trailing whitespace in *data*.

    When outputing data with leading or trailing whitespace, it can be useful
    to put quotation marks around the value so the whitespace is more
    apparent. If one value in a column needs quoted, then all values in that
    column are quoted to keep things consistent.

    .. NOTE::
       :data:`string.whitespace` is used to determine which characters are
       whitespace.

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param str quotestyle: The quotation mark to use (defaults to ``'``).
    :return: The processed data and headers.
    :rtype: tuple

    """
    whitespace = tuple(string.whitespace)
    quote = len(headers) * [False]
    data = list(data)
    for row in data:
        for i, v in enumerate(row):
            v = text_type(v)
            if v.startswith(whitespace) or v.endswith(whitespace):
                quote[i] = True

    def results(data):
        for row in data:
            result = []
            for i, v in enumerate(row):
                quotation = quotestyle if quote[i] else ""
                result.append(
                    "{quotestyle}{value}{quotestyle}".format(
                        quotestyle=quotation, value=v
                    )
                )
            yield result

    return results(data), headers


def style_output(
    data,
    headers,
    style=None,
    header_token=Token.Output.Header,
    odd_row_token=Token.Output.OddRow,
    even_row_token=Token.Output.EvenRow,
    **_,
):
    """Style the *data* and *headers* (e.g. bold, italic, and colors)

    .. NOTE::
        This requires the `Pygments <http://pygments.org/>`_ library to
        be installed. You can install it with CLI Helpers as an extra::
            $ pip install cli_helpers[styles]

    Example usage::

        from cli_helpers.tabular_output.preprocessors import style_output
        from pygments.style import Style
        from pygments.token import Token

        class YourStyle(Style):
            default_style = ""
            styles = {
                Token.Output.Header: 'bold ansibrightred',
                Token.Output.OddRow: 'bg:#eee #111',
                Token.Output.EvenRow: '#0f0'
            }

        headers = ('First Name', 'Last Name')
        data = [['Fred', 'Roberts'], ['George', 'Smith']]

        data, headers = style_output(data, headers, style=YourStyle)

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param str/pygments.style.Style style: A Pygments style. You can `create
        your own styles <https://pygments.org/docs/styles#creating-own-styles>`_.
    :param str header_token: The token type to be used for the headers.
    :param str odd_row_token: The token type to be used for odd rows.
    :param str even_row_token: The token type to be used for even rows.
    :return: The styled data and headers.
    :rtype: tuple

    """
    from cli_helpers.utils import filter_style_table

    relevant_styles = filter_style_table(
        style, header_token, odd_row_token, even_row_token
    )
    if style and HAS_PYGMENTS:
        if relevant_styles.get(header_token):
            headers = [
                utils.style_field(header_token, header, style) for header in headers
            ]
        if relevant_styles.get(odd_row_token) or relevant_styles.get(even_row_token):
            data = (
                [
                    utils.style_field(
                        odd_row_token if i % 2 else even_row_token, f, style
                    )
                    for f in r
                ]
                for i, r in enumerate(data, 1)
            )

    return iter(data), headers


def format_numbers(
    data, headers, column_types=(), integer_format=None, float_format=None, **_
):
    """Format numbers according to a format specification.

    This uses Python's format specification to format numbers of the following
    types: :class:`int`, :class:`py2:long` (Python 2), :class:`float`, and
    :class:`~decimal.Decimal`. See the :ref:`python:formatspec` for more
    information about the format strings.

    .. NOTE::
       A column is only formatted if all of its values are the same type
       (except for :data:`None`).

    :param iterable data: An :term:`iterable` (e.g. list) of rows.
    :param iterable headers: The column headers.
    :param iterable column_types: The columns' type objects (e.g. int or float).
    :param str integer_format: The format string to use for integer columns.
    :param str float_format: The format string to use for float columns.
    :return: The processed data and headers.
    :rtype: tuple

    """
    if (integer_format is None and float_format is None) or not column_types:
        return iter(data), headers

    def _format_number(field, column_type):
        if integer_format and column_type is int and type(field) in int_types:
            return format(field, integer_format)
        elif float_format and column_type is float and type(field) in float_types:
            return format(field, float_format)
        return field

    data = (
        [_format_number(v, column_types[i]) for i, v in enumerate(row)] for row in data
    )
    return data, headers