summaryrefslogtreecommitdiffstats
path: root/tests/tests_pandas.py
blob: 334a97cbb66a2cccd023320d2a4982bb8a5c40dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
from tqdm import tqdm

from .tests_tqdm import StringIO, closing, importorskip, mark, skip

pytestmark = mark.slow

random = importorskip('numpy.random')
rand = random.rand
randint = random.randint
pd = importorskip('pandas')


def test_pandas_setup():
    """Test tqdm.pandas()"""
    with closing(StringIO()) as our_file:
        tqdm.pandas(file=our_file, leave=True, ascii=True, total=123)
        series = pd.Series(randint(0, 50, (100,)))
        series.progress_apply(lambda x: x + 10)
        res = our_file.getvalue()
        assert '100/123' in res


def test_pandas_rolling_expanding():
    """Test pandas.(Series|DataFrame).(rolling|expanding)"""
    with closing(StringIO()) as our_file:
        tqdm.pandas(file=our_file, leave=True, ascii=True)

        series = pd.Series(randint(0, 50, (123,)))
        res1 = series.rolling(10).progress_apply(lambda x: 1, raw=True)
        res2 = series.rolling(10).apply(lambda x: 1, raw=True)
        assert res1.equals(res2)

        res3 = series.expanding(10).progress_apply(lambda x: 2, raw=True)
        res4 = series.expanding(10).apply(lambda x: 2, raw=True)
        assert res3.equals(res4)

        expects = ['114it']  # 123-10+1
        for exres in expects:
            our_file.seek(0)
            if our_file.getvalue().count(exres) < 2:
                our_file.seek(0)
                raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format(
                    exres + " at least twice.", our_file.read()))


def test_pandas_series():
    """Test pandas.Series.progress_apply and .progress_map"""
    with closing(StringIO()) as our_file:
        tqdm.pandas(file=our_file, leave=True, ascii=True)

        series = pd.Series(randint(0, 50, (123,)))
        res1 = series.progress_apply(lambda x: x + 10)
        res2 = series.apply(lambda x: x + 10)
        assert res1.equals(res2)

        res3 = series.progress_map(lambda x: x + 10)
        res4 = series.map(lambda x: x + 10)
        assert res3.equals(res4)

        expects = ['100%', '123/123']
        for exres in expects:
            our_file.seek(0)
            if our_file.getvalue().count(exres) < 2:
                our_file.seek(0)
                raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format(
                    exres + " at least twice.", our_file.read()))


def test_pandas_data_frame():
    """Test pandas.DataFrame.progress_apply and .progress_applymap"""
    with closing(StringIO()) as our_file:
        tqdm.pandas(file=our_file, leave=True, ascii=True)
        df = pd.DataFrame(randint(0, 50, (100, 200)))

        def task_func(x):
            return x + 1

        # applymap
        res1 = df.progress_applymap(task_func)
        res2 = df.applymap(task_func)
        assert res1.equals(res2)

        # apply unhashable
        res1 = []
        df.progress_apply(res1.extend)
        assert len(res1) == df.size

        # apply
        for axis in [0, 1, 'index', 'columns']:
            res3 = df.progress_apply(task_func, axis=axis)
            res4 = df.apply(task_func, axis=axis)
            assert res3.equals(res4)

        our_file.seek(0)
        if our_file.read().count('100%') < 3:
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format(
                '100% at least three times', our_file.read()))

        # apply_map, apply axis=0, apply axis=1
        expects = ['20000/20000', '200/200', '100/100']
        for exres in expects:
            our_file.seek(0)
            if our_file.getvalue().count(exres) < 1:
                our_file.seek(0)
                raise AssertionError("\nExpected:\n{0}\nIn:\n {1}\n".format(
                    exres + " at least once.", our_file.read()))


def test_pandas_groupby_apply():
    """Test pandas.DataFrame.groupby(...).progress_apply"""
    with closing(StringIO()) as our_file:
        tqdm.pandas(file=our_file, leave=False, ascii=True)

        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)

        dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc'))
        dfs.groupby(['a']).progress_apply(lambda x: None)

        df2 = df = pd.DataFrame({'a': randint(1, 8, 10000), 'b': rand(10000)})
        res1 = df2.groupby("a").apply(max)
        res2 = df2.groupby("a").progress_apply(max)
        assert res1.equals(res2)

        our_file.seek(0)

        # don't expect final output since no `leave` and
        # high dynamic `miniters`
        nexres = '100%|##########|'
        if nexres in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nDid not expect:\n{0}\nIn:{1}\n".format(
                nexres, our_file.read()))

    with closing(StringIO()) as our_file:
        tqdm.pandas(file=our_file, leave=True, ascii=True)

        dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc'))
        dfs.loc[0] = [2, 1, 1]
        dfs['d'] = 100

        expects = ['500/500', '1/1', '4/4', '2/2']
        dfs.groupby(dfs.index).progress_apply(lambda x: None)
        dfs.groupby('d').progress_apply(lambda x: None)
        dfs.groupby(dfs.columns, axis=1).progress_apply(lambda x: None)
        dfs.groupby([2, 2, 1, 1], axis=1).progress_apply(lambda x: None)

        our_file.seek(0)
        if our_file.read().count('100%') < 4:
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format(
                '100% at least four times', our_file.read()))

        for exres in expects:
            our_file.seek(0)
            if our_file.getvalue().count(exres) < 1:
                our_file.seek(0)
                raise AssertionError("\nExpected:\n{0}\nIn:\n {1}\n".format(
                    exres + " at least once.", our_file.read()))


def test_pandas_leave():
    """Test pandas with `leave=True`"""
    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm.pandas(file=our_file, leave=True, ascii=True)
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        exres = '100%|##########| 100/100'
        if exres not in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:{1}\n".format(
                exres, our_file.read()))


def test_pandas_apply_args_deprecation():
    """Test warning info in
    `pandas.Dataframe(Series).progress_apply(func, *args)`"""
    try:
        from tqdm import tqdm_pandas
    except ImportError as err:
        skip(str(err))

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.progress_apply(lambda x: None, 1)  # 1 shall cause a warning
        # Check deprecation message
        res = our_file.getvalue()
        assert all(i in res for i in (
            "TqdmDeprecationWarning", "not supported",
            "keyword arguments instead"))


def test_pandas_deprecation():
    """Test bar object instance as argument deprecation"""
    try:
        from tqdm import tqdm_pandas
    except ImportError as err:
        skip(str(err))

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)
        # Check deprecation message
        assert "TqdmDeprecationWarning" in our_file.getvalue()
        assert "instead of `tqdm_pandas(tqdm(...))`" in our_file.getvalue()

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm, file=our_file, leave=False, ascii=True, ncols=20)
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)
        # Check deprecation message
        assert "TqdmDeprecationWarning" in our_file.getvalue()
        assert "instead of `tqdm_pandas(tqdm, ...)`" in our_file.getvalue()