summaryrefslogtreecommitdiffstats
path: root/src/arrow/python/benchmarks/io.py
blob: 01a9acb647958c4955d12d6ff33622d86826f796 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import time
import pyarrow as pa


class HighLatencyReader(object):

    def __init__(self, raw, latency):
        self.raw = raw
        self.latency = latency

    def close(self):
        self.raw.close()

    @property
    def closed(self):
        return self.raw.closed

    def read(self, nbytes=None):
        time.sleep(self.latency)
        return self.raw.read(nbytes)


class HighLatencyWriter(object):

    def __init__(self, raw, latency):
        self.raw = raw
        self.latency = latency

    def close(self):
        self.raw.close()

    @property
    def closed(self):
        return self.raw.closed

    def write(self, data):
        time.sleep(self.latency)
        self.raw.write(data)


class BufferedIOHighLatency(object):
    """Benchmark creating a parquet manifest."""

    increment = 1024
    total_size = 16 * (1 << 20)  # 16 MB
    buffer_size = 1 << 20  # 1 MB
    latency = 0.1  # 100ms

    param_names = ('latency',)
    params = [0, 0.01, 0.1]

    def time_buffered_writes(self, latency):
        test_data = b'x' * self.increment
        bytes_written = 0
        out = pa.BufferOutputStream()
        slow_out = HighLatencyWriter(out, latency)
        buffered_out = pa.output_stream(slow_out, buffer_size=self.buffer_size)

        while bytes_written < self.total_size:
            buffered_out.write(test_data)
            bytes_written += self.increment
        buffered_out.flush()

    def time_buffered_reads(self, latency):
        bytes_read = 0
        reader = pa.input_stream(pa.py_buffer(b'x' * self.total_size))
        slow_reader = HighLatencyReader(reader, latency)
        buffered_reader = pa.input_stream(slow_reader,
                                          buffer_size=self.buffer_size)
        while bytes_read < self.total_size:
            buffered_reader.read(self.increment)
            bytes_read += self.increment