summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/parameter/test/efficiency.cpp
blob: ddfb62a17b26f83b54a15621c6ef330c38745235 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
// Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2005.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#include <boost/parameter/name.hpp>
#include <boost/config/workaround.hpp>
#include <boost/timer.hpp>
#include <iostream>

namespace test {

    //
    // This test measures the abstraction overhead of using the named
    // parameter interface.  Some actual test results have been recorded
    // in timings.txt in this source file's directory, or
    // http://www.boost.org/libs/parameter/test/timings.txt.
    //
    // Caveats:
    //
    //   1. This test penalizes the named parameter library slightly, by
    //      passing two arguments through the named interface, while
    //      only passing one through the plain C++ interface.
    //
    //   2. This test does not measure the case where an ArgumentPack is
    //      so large that it doesn't fit in the L1 cache.
    //
    //   3. Although we've tried to make this test as general as possible,
    //      we are targeting it at a specific application.  Where that
    //      affects design decisions, we've noted it below in ***...***.
    //
    //   4. The first time you run this program, the time may not be
    //      representative because of disk and memory cache effects, so
    //      always run it multiple times and ignore the first
    //      measurement.  This approach will also allow you to estimate
    //      the statistical error of your test by observing the
    //      variation in the valid times.
    //
    //   5. Try to run this program on a machine that's otherwise idle,
    //      or other processes and even device hardware interrupts may
    //      interfere by causing caches to be flushed.

    // Accumulator function object with plain C++ interface 
    template <typename T>
    struct plain_weight_running_total
    {
        plain_weight_running_total()
#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
          : sum(T())
#else
          : sum()
#endif
        {
        }

        void operator()(T w)
        {
            this->sum += w;
        }

        T sum;
    };

    BOOST_PARAMETER_NAME(weight)
    BOOST_PARAMETER_NAME(value)

    // Accumulator function object with named parameter interface
    template <typename T>
    struct named_param_weight_running_total
    {
        named_param_weight_running_total()
#if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
          : sum(T())
#else
          : sum()
#endif
        {
        }

        template <typename ArgumentPack>
        void operator()(ArgumentPack const& variates)
        {
            this->sum += variates[test::_weight];
        }

        T sum;
    };

    // This value is required to ensure that a smart compiler's dead code
    // elimination doesn't optimize away anything we're testing.  We'll use it
    // to compute the return code of the executable to make sure it's needed.
    double live_code;

    // Call objects of the given Accumulator type repeatedly
    // with x an argument.
    template <typename Accumulator, typename Arg>
    void hammer(Arg const& x, long const repeats)
    {
        // Strategy: because the sum in an accumulator after each call
        // depends on the previous value of the sum, the CPU's pipeline
        // might be stalled while waiting for the previous addition to
        // complete.  Therefore, we allocate an array of accumulators,
        // and update them in sequence, so that there's no dependency
        // between adjacent addition operations.
        //
        // Additionally, if there were only one accumulator, the compiler or
        // CPU might decide to update the value in a register rather than
        // writing it back to memory.  We want each operation to at least
        // update the L1 cache.  *** Note: This concern is specific to the
        // particular application at which we're targeting the test. ***

        // This has to be at least as large as the number of simultaneous
        // accumulations that can be executing in the compiler pipeline.  A
        // safe number here is larger than the machine's maximum pipeline
        // depth.  If you want to test the L2 or L3 cache, or main memory,
        // you can increase the size of this array.  1024 is an upper limit
        // on the pipeline depth of current vector machines.
        std::size_t const number_of_accumulators = 1024;

        Accumulator a[number_of_accumulators];

        for (long iteration = 0; iteration < repeats; ++iteration)
        {
            for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
            {
                (*ap)(x);
            }
        }

        // Accumulate all the partial sums to avoid dead code elimination.
        for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
        {
            test::live_code += ap->sum;
        }
    }

    // Measure the time required to hammer accumulators of the given
    // type with the argument x.
    template <typename Accumulator, typename T>
    double measure(T const& x, long const repeats)
    {
        // Hammer accumulators a couple of times to ensure the instruction
        // cache is full of our test code, and that we don't measure the cost
        // of a page fault for accessing the data page containing the memory
        // where the accumulators will be allocated.
        test::hammer<Accumulator>(x, repeats);
        test::hammer<Accumulator>(x, repeats);

        // Now start a timer.
        boost::timer time;
        test::hammer<Accumulator>(x, repeats);  // This time, we'll measure.
        return time.elapsed();
    }
}

int main()
{
    // First decide how many repetitions to measure.
    long repeats = 100;
    double measured = 0;

    while (measured < 1.0 && repeats <= 10000000)
    {
        repeats *= 10;

        boost::timer time;

        test::hammer<test::plain_weight_running_total<double> >(.1, repeats);
        test::hammer<test::named_param_weight_running_total<double> >(
            (test::_weight = .1, test::_value = .2), repeats
        );

        measured = time.elapsed();
    }
    
    std::cout
        << "plain time:           "
        << test::measure<test::plain_weight_running_total<double> >(
            .1, repeats
        )
        << std::endl;

    std::cout
        << "named parameter time: "
        << test::measure<test::named_param_weight_running_total<double> >(
            (test::_weight = .1, test::_value = .2), repeats
        )
        << std::endl;

    // This is ultimately responsible for preventing all the test code
    // from being optimized away.  Change this to return 0 and you
    // unplug the whole test's life support system.
    return test::live_code < 0.;
}