diff options
Diffstat (limited to 'src/boost/libs/fusion/example/performance')
9 files changed, 1619 insertions, 0 deletions
diff --git a/src/boost/libs/fusion/example/performance/Jamfile b/src/boost/libs/fusion/example/performance/Jamfile new file mode 100644 index 00000000..3b8c8ffc --- /dev/null +++ b/src/boost/libs/fusion/example/performance/Jamfile @@ -0,0 +1,20 @@ +#============================================================================== +# Copyright (c) 2003-2006 Joel de Guzman +# Copyright (c) 2006 Dan Marsden +# +# Use, modification and distribution is subject to the Boost Software +# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) +#============================================================================== +project fusion-performance ; + +exe accumulate : accumulate.cpp ; + +exe inner_product : inner_product.cpp ; + +exe inner_product2 : inner_product2.cpp ; + +exe sequence_efficiency : sequence_efficiency.cpp ; + +exe functional : functional.cpp ; + diff --git a/src/boost/libs/fusion/example/performance/accumulate.cpp b/src/boost/libs/fusion/example/performance/accumulate.cpp new file mode 100644 index 00000000..176dc458 --- /dev/null +++ b/src/boost/libs/fusion/example/performance/accumulate.cpp @@ -0,0 +1,357 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2005-2006 Dan Marsden + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ +#include <boost/array.hpp> +#include <boost/timer.hpp> + +#include <boost/fusion/algorithm/iteration/accumulate.hpp> +#include <boost/fusion/algorithm/transformation/transform.hpp> +#include <boost/fusion/container/vector.hpp> +#include <boost/fusion/algorithm/transformation/zip.hpp> +#include <boost/fusion/sequence/intrinsic/at.hpp> +#include <boost/fusion/adapted/array.hpp> + +#include <boost/type_traits/remove_reference.hpp> + +#include <algorithm> +#include <numeric> +#include <functional> +#include <iostream> +#include <cmath> +#include <limits> + +#ifdef _MSC_VER +// inline aggressively +# pragma inline_recursion(on) // turn on inline recursion +# pragma inline_depth(255) // max inline depth +#endif + +int const REPEAT_COUNT = 10; + +double const duration = 0.5; + +namespace +{ + template<int N> + double time_for_std_accumulate(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr; + std::generate(arr.begin(), arr.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::accumulate(arr.begin(), arr.end(), 0); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::accumulate(arr.begin(), arr.end(), 0); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } + + struct poly_add + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_add(Lhs,Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs + rhs; + } + }; + + struct poly_mult + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_mult(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs * rhs; + } + }; + + template<int N> + double time_for_fusion_accumulate(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr; + std::generate(arr.begin(), arr.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate(arr, 0, poly_add()); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + std::cout << iter << " iterations" << std::endl; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate(arr, 0, poly_add()); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + std::cout << "."; + std::cout.flush(); + } + std::cout << i << std::endl; + return result / iter; + } + +#if 0 + template<int N> + double time_for_std_inner_product(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } + + template<int N> + double time_for_fusion_inner_product(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add()); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add()); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } + + struct poly_combine + { + template<typename Lhs, typename Rhs> + struct result + { + typedef Lhs type; + }; + + template<typename Lhs, typename Rhs> + typename result<Lhs,Rhs>::type + operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs + boost::fusion::at_c<0>(rhs) * boost::fusion::at_c<1>(rhs); + } + }; + + template<int N> + double time_for_fusion_inner_product2(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::zip(arr1, arr2), 0, poly_combine()); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + std::cout << iter << " iterations" << std::endl; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::zip(arr1, arr2), 0, poly_combine()); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } +#endif +} + +int main() +{ + int total = 0; + int res; + std::cout << "short accumulate std test " << time_for_std_accumulate<8>(res) << std::endl; + total += res; + std::cout << "short accumulate fusion test " << time_for_fusion_accumulate<8>(res) << std::endl; + total += res; + + std::cout << "medium accumulate std test " << time_for_std_accumulate<64>(res) << std::endl; + total += res; + std::cout << "medium accumulate fusion test " << time_for_fusion_accumulate<64>(res) << std::endl; + total += res; + + std::cout << "long accumulate std test " << time_for_std_accumulate<128>(res) << std::endl; + total += res; + std::cout << "long accumulate fusion test " << time_for_fusion_accumulate<128>(res) << std::endl; + total += res; + +#if 0 + std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl; + total += res; + std::cout << "short inner_product fusion test " << time_for_fusion_inner_product<8>(res) << std::endl; + total += res; + std::cout << "short inner_product fusion 2 test " << time_for_fusion_inner_product2<8>(res) << std::endl; + total += res; + + std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl; + total += res; + std::cout << "medium inner_product fusion test " << time_for_fusion_inner_product<64>(res) << std::endl; + total += res; + std::cout << "medium inner_product fusion 2 test " << time_for_fusion_inner_product2<64>(res) << std::endl; + total += res; + + + std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl; + total += res; + std::cout << "long inner_product fusion test " << time_for_fusion_inner_product<128>(res) << std::endl; + total += res; + std::cout << "long inner_product fusion 2 test " << time_for_fusion_inner_product2<128>(res) << std::endl; + total += res; +#endif + + return total; +} diff --git a/src/boost/libs/fusion/example/performance/functional.cpp b/src/boost/libs/fusion/example/performance/functional.cpp new file mode 100644 index 00000000..9207a90d --- /dev/null +++ b/src/boost/libs/fusion/example/performance/functional.cpp @@ -0,0 +1,307 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2006-2007 Tobias Schwinger + + Use modification and distribution are subject to the Boost Software + License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt). +==============================================================================*/ + +#include <boost/fusion/container/list.hpp> +#include <boost/fusion/container/vector.hpp> +#include <boost/fusion/algorithm/iteration/fold.hpp> +#include <boost/fusion/functional/adapter/unfused.hpp> +#include <boost/fusion/functional/adapter/fused_function_object.hpp> + +#include <boost/functional/forward_adapter.hpp> +#include <boost/functional/lightweight_forward_adapter.hpp> + +#include <boost/utility/result_of.hpp> +#include <boost/config.hpp> +#include <boost/timer.hpp> +#include <algorithm> +#include <iostream> + +#ifdef _MSC_VER +// inline aggressively +# pragma inline_recursion(on) // turn on inline recursion +# pragma inline_depth(255) // max inline depth +#endif + +int const REPEAT_COUNT = 3; + +double const duration = 0.125; + + +namespace +{ + struct fused_sum + { + template <typename Seq> + int operator()(Seq const & seq) const + { + int state = 0; + return boost::fusion::fold(seq, state, sum_op()); + } + + typedef int result_type; + + private: + + struct sum_op + { + template <typename T> + int operator()(T const & elem, int value) const + { + return value + sizeof(T) * elem; + } + + template <typename T> + int operator()(T & elem, int value) const + { + elem += sizeof(T); + return value; + } + + typedef int result_type; + }; + }; + + struct unfused_sum + { + inline int operator()() const + { + return 0; + } + template<typename T0> + inline int operator()(T0 const & a0) const + { + return a0; + } + template<typename T0, typename T1> + inline int operator()(T0 const & a0, T1 const & a1) const + { + return a0 + a1; + } + template<typename T0, typename T1, typename T2> + inline int operator()(T0 const & a0, T1 const & a1, T2 a2) const + { + return a0 + a1 + a2; + } + template<typename T0, typename T1, typename T2, typename T3> + inline int operator()(T0 const & a0, T1 const & a1, T2 const & a2, T3 const & a3) const + { + return a0 + a1 + a2 + a3; + } + + typedef int result_type; + }; + + template<typename F> + double call_unfused(F const & func, int & j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i += func(); + i += func(0); + i += func(0,1); + i += func(0,1,2); + i += func(0,1,2,3); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = func(); j += i; + i = func(0); j += i; + i = func(0,1); j += i; + i = func(0,1,2); j += i; + i = func(0,1,2,3); j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + return result / iter; + } + + template<typename F> + double call_fused_ra(F const & func, int & j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + do + { + boost::fusion::vector<> v0; + boost::fusion::vector<int> v1(0); + boost::fusion::vector<int,int> v2(0,1); + boost::fusion::vector<int,int,int> v3(0,1,2); + boost::fusion::vector<int,int,int,int> v4(0,1,2,3); + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i += func(v0); + i += func(v1); + i += func(v2); + i += func(v3); + i += func(v4); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + boost::fusion::vector<> v0; + boost::fusion::vector<int> v1(0); + boost::fusion::vector<int,int> v2(0,1); + boost::fusion::vector<int,int,int> v3(0,1,2); + boost::fusion::vector<int,int,int,int> v4(0,1,2,3); + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = func(v0); j += i; + i = func(v1); j += i; + i = func(v2); j += i; + i = func(v3); j += i; + i = func(v4); j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + return result / iter; + } + + template<typename F> + double call_fused(F const & func, int & j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + do + { + boost::fusion::list<> l0; + boost::fusion::list<int> l1(0); + boost::fusion::list<int,int> l2(0,1); + boost::fusion::list<int,int,int> l3(0,1,2); + boost::fusion::list<int,int,int,int> l4(0,1,2,3); + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i += func(l0); + i += func(l1); + i += func(l2); + i += func(l3); + i += func(l4); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + boost::fusion::list<> l0; + boost::fusion::list<int> l1(0); + boost::fusion::list<int,int> l2(0,1); + boost::fusion::list<int,int,int> l3(0,1,2); + boost::fusion::list<int,int,int,int> l4(0,1,2,3); + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = func(l0); j += i; + i = func(l1); j += i; + i = func(l2); j += i; + i = func(l3); j += i; + i = func(l4); j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + return result / iter; + } +} + +int main() +{ + int total = 0; + int res; + typedef fused_sum F; + typedef unfused_sum U; + + std::cout << "Compiler: " << BOOST_COMPILER << std::endl; + std::cout << std::endl << "Unfused adapters:" << std::endl; + { + F f; + std::cout << "F /* a fused function object */ " << call_fused_ra(f,res) << std::endl; + total += res; + } + { + F f; + std::cout << "without random access " << call_fused(f,res) << std::endl; + total += res; + } + { + boost::lightweight_forward_adapter< boost::fusion::unfused<F> > f; + std::cout << "lightweight_forward_adapter< unfused<F> > " << call_unfused(f,res) << std::endl; + total += res; + } + { + boost::forward_adapter< boost::fusion::unfused<F> > f; + std::cout << "forward_adapter< unfused<F> > " << call_unfused(f,res) << std::endl; + total += res; + } + std::cout << std::endl << "Fused adapters:" << std::endl; + { + unfused_sum f; + std::cout << "U /* an unfused function object */ " << call_unfused(f,res) << std::endl; + total += res; + } + { + boost::fusion::fused_function_object<U> f; + std::cout << "fused_function_object<U> " << call_fused_ra(f,res) << std::endl; + total += res; + } + { + boost::fusion::fused_function_object<U> f; + std::cout << "without random access " << call_fused(f,res) << std::endl; + total += res; + } + { + boost::lightweight_forward_adapter< boost::fusion::unfused< boost::fusion::fused_function_object<U> > > f; + std::cout << "lightweight_forward_adapter< unfused<fused_function_object<U> > >" << call_unfused(f,res) << std::endl; + total += res; + } + { + boost::forward_adapter< boost::fusion::unfused< boost::fusion::fused_function_object<U> > > f; + std::cout << "forward_adapter< unfused<fused_function_object<U> > > " << call_unfused(f,res) << std::endl; + total += res; + } + + return total; +} diff --git a/src/boost/libs/fusion/example/performance/inner_product.cpp b/src/boost/libs/fusion/example/performance/inner_product.cpp new file mode 100644 index 00000000..c9f22c7c --- /dev/null +++ b/src/boost/libs/fusion/example/performance/inner_product.cpp @@ -0,0 +1,184 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2005-2006 Dan Marsden + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ +#include <boost/array.hpp> +#include <boost/timer.hpp> + +#include <boost/fusion/algorithm/iteration/accumulate.hpp> +#include <boost/fusion/algorithm/transformation/transform.hpp> +#include <boost/fusion/container/vector.hpp> +#include <boost/fusion/algorithm/transformation/zip.hpp> +#include <boost/fusion/sequence/intrinsic/at.hpp> +#include <boost/fusion/adapted/array.hpp> +#include <boost/fusion/sequence/intrinsic/at.hpp> + +#include <boost/type_traits/remove_reference.hpp> + +#include <algorithm> +#include <numeric> +#include <functional> +#include <iostream> +#include <cmath> +#include <limits> + +#ifdef _MSC_VER +// inline aggressively +# pragma inline_recursion(on) // turn on inline recursion +# pragma inline_depth(255) // max inline depth +#endif + +int const REPEAT_COUNT = 10; + +double const duration = 0.5; + +namespace +{ + struct poly_add + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_add(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs + rhs; + } + }; + + struct poly_mult + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_mult(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs * rhs; + } + }; + + template<int N> + double time_for_std_inner_product(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } + + template<int N> + double time_for_fusion_inner_product(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add()); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::transform(arr1, arr2, poly_mult()), 0, poly_add()); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } +} + +int main() +{ + int total = 0; + int res; + + std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl; + total += res; + std::cout << "short inner_product fusion test " << time_for_fusion_inner_product<8>(res) << std::endl; + total += res; + + std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl; + total += res; + std::cout << "medium inner_product fusion test " << time_for_fusion_inner_product<64>(res) << std::endl; + total += res; + + std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl; + total += res; + std::cout << "long inner_product fusion test " << time_for_fusion_inner_product<128>(res) << std::endl; + total += res; + + return total; +} diff --git a/src/boost/libs/fusion/example/performance/inner_product2.cpp b/src/boost/libs/fusion/example/performance/inner_product2.cpp new file mode 100644 index 00000000..f1d536af --- /dev/null +++ b/src/boost/libs/fusion/example/performance/inner_product2.cpp @@ -0,0 +1,206 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + Copyright (c) 2005-2006 Dan Marsden + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ +#include <boost/array.hpp> +#include <boost/timer.hpp> + +#include <boost/fusion/algorithm/iteration/accumulate.hpp> +#include <boost/fusion/algorithm/transformation/transform.hpp> +#include <boost/fusion/container/vector.hpp> +#include <boost/fusion/algorithm/transformation/zip.hpp> +#include <boost/fusion/sequence/intrinsic/at.hpp> +#include <boost/fusion/adapted/array.hpp> +#include <boost/fusion/sequence/intrinsic/at.hpp> + +#include <boost/type_traits/remove_reference.hpp> + +#include <algorithm> +#include <numeric> +#include <functional> +#include <iostream> +#include <cmath> +#include <limits> + +#ifdef _MSC_VER +// inline aggressively +# pragma inline_recursion(on) // turn on inline recursion +# pragma inline_depth(255) // max inline depth +#endif + +int const REPEAT_COUNT = 10; + +double const duration = 0.5; + +namespace +{ + struct poly_add + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_add(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs + rhs; + } + }; + + struct poly_mult + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_mult(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs * rhs; + } + }; + + template<int N> + double time_for_std_inner_product(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = std::inner_product(arr1.begin(), arr1.end(), arr2.begin(), 0); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } + + struct poly_combine + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_combine(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + typename result<poly_combine(Lhs,Rhs)>::type + operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs + boost::fusion::at_c<0>(rhs) * boost::fusion::at_c<1>(rhs); + } + }; + + template<int N> + double time_for_fusion_inner_product2(int& j) + { + boost::timer tim; + int i = 0; + long long iter = 65536; + long long counter, repeats; + double result = (std::numeric_limits<double>::max)(); + double runtime = 0; + double run; + boost::array<int, N> arr1; + boost::array<int, N> arr2; + std::generate(arr1.begin(), arr1.end(), rand); + std::generate(arr2.begin(), arr2.end(), rand); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::zip(arr1, arr2), 0, poly_combine()); + static_cast<void>(i); + } + runtime = tim.elapsed(); + iter *= 2; + } while(runtime < duration); + iter /= 2; + + std::cout << iter << " iterations" << std::endl; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + i = boost::fusion::accumulate( + boost::fusion::zip(arr1, arr2), 0, poly_combine()); + j += i; + } + run = tim.elapsed(); + result = (std::min)(run, result); + } + std::cout << i << std::endl; + return result / iter; + } +} + +int main() +{ + int total = 0; + int res; + + std::cout << "short inner_product std test " << time_for_std_inner_product<8>(res) << std::endl; + total += res; + std::cout << "short inner_product fusion 2 test " << time_for_fusion_inner_product2<8>(res) << std::endl; + total += res; + + std::cout << "medium inner_product std test " << time_for_std_inner_product<64>(res) << std::endl; + total += res; + std::cout << "medium inner_product fusion 2 test " << time_for_fusion_inner_product2<64>(res) << std::endl; + total += res; + +#if 0 // Leads to ICE with MSVC 8.0 + std::cout << "long inner_product std test " << time_for_std_inner_product<128>(res) << std::endl; + total += res; + std::cout << "long inner_product fusion 2 test " << time_for_fusion_inner_product2<128>(res) << std::endl; + total += res; +#endif + + return total; +} diff --git a/src/boost/libs/fusion/example/performance/measure.hpp b/src/boost/libs/fusion/example/performance/measure.hpp new file mode 100644 index 00000000..72cd71ba --- /dev/null +++ b/src/boost/libs/fusion/example/performance/measure.hpp @@ -0,0 +1,85 @@ +// Copyright David Abrahams, Matthias Troyer, Michael Gauckler +// 2005. Distributed under the Boost Software License, Version +// 1.0. (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +#if !defined(LIVE_CODE_TYPE) +# define LIVE_CODE_TYPE int +#endif + +#include <boost/timer.hpp> + +namespace test +{ + // This value is required to ensure that a smart compiler's dead + // code elimination doesn't optimize away anything we're testing. + // We'll use it to compute the return code of the executable to make + // sure it's needed. + LIVE_CODE_TYPE live_code; + + // Call objects of the given Accumulator type repeatedly with x as + // an argument. + template <class Accumulator, class Arg> + void hammer(Arg const& x, long const repeats) + { + // Strategy: because the sum in an accumulator after each call + // depends on the previous value of the sum, the CPU's pipeline + // might be stalled while waiting for the previous addition to + // complete. Therefore, we allocate an array of accumulators, + // and update them in sequence, so that there's no dependency + // between adjacent addition operations. + // + // Additionally, if there were only one accumulator, the + // compiler or CPU might decide to update the value in a + // register rather that writing it back to memory. we want each + // operation to at least update the L1 cache. *** Note: This + // concern is specific to the particular application at which + // we're targeting the test. *** + + // This has to be at least as large as the number of + // simultaneous accumulations that can be executing in the + // compiler pipeline. A safe number here is larger than the + // machine's maximum pipeline depth. If you want to test the L2 + // or L3 cache, or main memory, you can increase the size of + // this array. 1024 is an upper limit on the pipeline depth of + // current vector machines. + const std::size_t number_of_accumulators = 1024; + live_code = 0; // reset to zero + + Accumulator a[number_of_accumulators]; + + for (long iteration = 0; iteration < repeats; ++iteration) + { + for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) + { + (*ap)(x); + } + } + + // Accumulate all the partial sums to avoid dead code + // elimination. + for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) + { + live_code += ap->sum; + } + } + + // Measure the time required to hammer accumulators of the given + // type with the argument x. + template <class Accumulator, class T> + double measure(T const& x, long const repeats) + { + // Hammer accumulators a couple of times to ensure the + // instruction cache is full of our test code, and that we don't + // measure the cost of a page fault for accessing the data page + // containing the memory where the accumulators will be + // allocated + hammer<Accumulator>(x, repeats); + hammer<Accumulator>(x, repeats); + + // Now start a timer + boost::timer time; + hammer<Accumulator>(x, repeats); // This time, we'll measure + return time.elapsed() / repeats; // return the time of one iteration + } +} diff --git a/src/boost/libs/fusion/example/performance/sequence_efficiency.cpp b/src/boost/libs/fusion/example/performance/sequence_efficiency.cpp new file mode 100644 index 00000000..307ecdf3 --- /dev/null +++ b/src/boost/libs/fusion/example/performance/sequence_efficiency.cpp @@ -0,0 +1,248 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ +#include "measure.hpp" + +#define FUSION_MAX_LIST_SIZE 30 +#define FUSION_MAX_VECTOR_SIZE 30 + +#include <boost/fusion/algorithm/iteration/accumulate.hpp> +#include <boost/fusion/container/vector.hpp> +#include <boost/fusion/container/list.hpp> + +#include <boost/type_traits/remove_reference.hpp> + +#include <boost/lexical_cast.hpp> +#include <boost/preprocessor/stringize.hpp> +#include <boost/preprocessor/enum.hpp> + +#include <iostream> + +#ifdef _MSC_VER +// inline aggressively +# pragma inline_recursion(on) // turn on inline recursion +# pragma inline_depth(255) // max inline depth +#endif + +// About the tests: +// +// The tests below compare various fusion sequences to see how abstraction +// affects prformance. +// +// We have 3 sequence sizes for each fusion sequence we're going to test. +// +// small = 3 elements +// medium = 10 elements +// big = 30 elements +// +// The sequences are initialized with values 0..N-1 from numeric strings +// parsed by boost::lexical_cast to make sure that the compiler is not +// optimizing by replacing the computation with constant results computed +// at compile time. +// +// These sequences will be subjected to our accumulator which calls +// fusion::accumulate: +// +// this->sum += boost::fusion::accumulate(seq, 0, poly_add()); +// +// where poly_add simply sums the current value with the content of +// the sequence element. This accumulator will be called many times +// through the "hammer" test (see measure.hpp). +// +// The tests are compared against a base using a plain_accumulator +// which does a simple addition: +// +// this->sum += x; + +namespace +{ + struct poly_add + { + template<typename Sig> + struct result; + + template<typename Lhs, typename Rhs> + struct result<poly_add(Lhs, Rhs)> + : boost::remove_reference<Lhs> + {}; + + template<typename Lhs, typename Rhs> + Lhs operator()(const Lhs& lhs, const Rhs& rhs) const + { + return lhs + rhs; + } + }; + + // Our Accumulator function + template <typename T> + struct accumulator + { + accumulator() + : sum() + {} + + template <typename Sequence> + void operator()(Sequence const& seq) + { + this->sum += boost::fusion::accumulate(seq, 0, poly_add()); + } + + T sum; + }; + + // Plain Accumulator function + template <typename T> + struct plain_accumulator + { + plain_accumulator() + : sum() + {} + + template <typename X> + void operator()(X const& x) + { + this->sum += x; + } + + T sum; + }; + + template <typename T> + void check(T const& seq, char const* info) + { + test::measure<accumulator<int> >(seq, 1); + std::cout << info << test::live_code << std::endl; + } + + template <typename T> + void measure(T const& seq, char const* info, long const repeats, double base) + { + double t = test::measure<accumulator<int> >(seq, repeats); + std::cout + << info + << t + << " (" << int((t/base)*100) << "%)" + << std::endl; + } + + template <typename T> + void test_assembler(T const& seq) + { + test::live_code = boost::fusion::accumulate(seq, 0, poly_add()); + } +} + +// We'll initialize the sequences from numeric strings that +// pass through boost::lexical_cast to make sure that the +// compiler is not optimizing by replacing the computation +// with constant results computed at compile time. +#define INIT(z, n, text) boost::lexical_cast<int>(BOOST_PP_STRINGIZE(n)) + +int main() +{ + using namespace boost::fusion; + std::cout.setf(std::ios::scientific); + + vector< + int, int, int + > + vsmall(BOOST_PP_ENUM(3, INIT, _)); + + list< + int, int, int + > + lsmall(BOOST_PP_ENUM(3, INIT, _)); + + vector< + int, int, int, int, int, int, int, int, int, int + > + vmedium(BOOST_PP_ENUM(10, INIT, _)); + + list< + int, int, int, int, int, int, int, int, int, int + > + lmedium(BOOST_PP_ENUM(10, INIT, _)); + + vector< + int, int, int, int, int, int, int, int, int, int + , int, int, int, int, int, int, int, int, int, int + , int, int, int, int, int, int, int, int, int, int + > + vbig(BOOST_PP_ENUM(30, INIT, _)); + + list< + int, int, int, int, int, int, int, int, int, int + , int, int, int, int, int, int, int, int, int, int + , int, int, int, int, int, int, int, int, int, int + > + lbig(BOOST_PP_ENUM(30, INIT, _)); + + // first decide how many repetitions to measure + long repeats = 100; + double measured = 0; + while (measured < 2.0 && repeats <= 10000000) + { + repeats *= 10; + + boost::timer time; + + test::hammer<plain_accumulator<int> >(0, repeats); + test::hammer<accumulator<int> >(vsmall, repeats); + test::hammer<accumulator<int> >(lsmall, repeats); + test::hammer<accumulator<int> >(vmedium, repeats); + test::hammer<accumulator<int> >(lmedium, repeats); + test::hammer<accumulator<int> >(vbig, repeats); + test::hammer<accumulator<int> >(lbig, repeats); + + measured = time.elapsed(); + } + + test::measure<plain_accumulator<int> >(1, 1); + std::cout + << "base accumulated result: " + << test::live_code + << std::endl; + + double base_time = test::measure<plain_accumulator<int> >(1, repeats); + std::cout + << "base time: " + << base_time; + + std::cout + << std::endl + << "-------------------------------------------------------------------" + << std::endl; + + check(vsmall, "small vector accumulated result: "); + check(lsmall, "small list accumulated result: "); + check(vmedium, "medium vector accumulated result: "); + check(lmedium, "medium list accumulated result: "); + check(vbig, "big vector accumulated result: "); + check(lbig, "big list accumulated result: "); + + std::cout + << "-------------------------------------------------------------------" + << std::endl; + + measure(vsmall, "small vector time: ", repeats, base_time); + measure(lsmall, "small list time: ", repeats, base_time); + measure(vmedium, "medium vector time: ", repeats, base_time); + measure(lmedium, "medium list time: ", repeats, base_time); + measure(vbig, "big vector time: ", repeats, base_time); + measure(lbig, "big list time: ", repeats, base_time); + + std::cout + << "-------------------------------------------------------------------" + << std::endl; + + // Let's see how this looks in assembler + test_assembler(vmedium); + + // This is ultimately responsible for preventing all the test code + // from being optimized away. Change this to return 0 and you + // unplug the whole test's life support system. + return test::live_code != 0; +} diff --git a/src/boost/libs/fusion/example/performance/timings.txt b/src/boost/libs/fusion/example/performance/timings.txt new file mode 100644 index 00000000..35549170 --- /dev/null +++ b/src/boost/libs/fusion/example/performance/timings.txt @@ -0,0 +1,57 @@ +=============================================================================== +Copyright (C) 2001-2007 Joel de Guzman, Dan Marsden, Tobias Schwinger + +Use, modification and distribution is subject to the Boost Software +License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at +http://www.boost.org/LICENSE_1_0.txt) +=============================================================================== + +Timing result for sequence_efficiency.cpp comparing the speed of various +fusion sequences. The test involves accumulating the elements of the +sequence which is primed to have values 0..N (N=size of sequence). Small, +medium and big sequences are tested where: + + small = 3 elements + medium = 10 elements + big = 30 elements + +Tester: Joel de Guzman. WinXP, P4-3.0GHZ, 2GB RAM + +VC7.1 (flags = /MD /O2 /EHsc /GS) + + small vector time: 1.870000e-006 + small list time: 1.870000e-006 + medium vector time: 1.880000e-006 + medium list time: 3.600000e-006 + big vector time: 2.030000e-006 + big list time: 8.910000e-006 + +VC8.0 (flags = /MD /O2 /EHsc /GS) + + small vector time: 2.500000e-05 + small list time: 2.500000e-05 + medium vector time: 7.810000e-05 + medium list time: 7.810000e-05 + big vector time: 2.469000e-04 + big list time: 2.453000e-04 + +G++ 3.4 (flags = -ftemplate-depth-128 -funroll-loops -O3 -finline-functions -Wno-inline -Wall) + + small vector time: 2.500000e-05 + small list time: 2.500000e-05 + medium vector time: 7.970000e-05 + medium list time: 7.970000e-05 + big vector time: 2.516000e-04 + big list time: 2.485000e-04 + +Intel 9.1 (flags = /MD /O2 /EHsc /GS) + + small vector time: 1.125000e-006 + small list time: 1.125000e-006 + medium vector time: 1.125000e-006 + medium list time: 1.141000e-006 + big vector time: 1.140000e-006 + big list time: 1.141000e-006 + + + diff --git a/src/boost/libs/fusion/example/performance/zip_efficiency.cpp b/src/boost/libs/fusion/example/performance/zip_efficiency.cpp new file mode 100644 index 00000000..6d240f2c --- /dev/null +++ b/src/boost/libs/fusion/example/performance/zip_efficiency.cpp @@ -0,0 +1,155 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +==============================================================================*/ +#include "measure.hpp" + +//~ #define FUSION_MAX_VECTOR_SIZE 30 + +#include <boost/fusion/algorithm/iteration/accumulate.hpp> +#include <boost/fusion/algorithm/transformation/zip.hpp> +#include <boost/fusion/container/vector.hpp> +#include <boost/fusion/sequence/intrinsic/value_at.hpp> +#include <boost/fusion/sequence/intrinsic/at.hpp> +#include <boost/type_traits/remove_reference.hpp> +#include <iostream> + +#ifdef _MSC_VER +// inline aggressively +# pragma inline_recursion(on) // turn on inline recursion +# pragma inline_depth(255) // max inline depth +#endif + +namespace +{ + struct zip_add + { + template<typename Lhs, typename Rhs> + struct result + { + typedef typename + boost::remove_reference< + typename boost::fusion::result_of::value_at_c<Lhs, 0>::type + >::type + type; + }; + + template<typename Lhs, typename Rhs> + typename result<Lhs, Rhs>::type + operator()(const Lhs& lhs, const Rhs& rhs) const + { + return boost::fusion::at_c<0>(lhs) + boost::fusion::at_c<1>(lhs) + rhs; + } + }; + + // Our Accumulator function + template <typename T> + struct zip_accumulator + { + zip_accumulator() + : sum() + {} + + template <typename Sequence> + void operator()(Sequence const& seq) + { + this->sum += boost::fusion::accumulate(seq, 0, zip_add()); + } + + T sum; + }; + + template <typename T> + void check(T const& seq, char const* info) + { + test::measure<zip_accumulator<int> >(seq, 1); + std::cout << info << test::live_code << std::endl; + } + + template <typename T> + void measure(T const& seq, char const* info, long const repeats) + { + std::cout + << info + << test::measure<zip_accumulator<int> >(seq, repeats) + << std::endl; + } +} + +int main() +{ + using namespace boost::fusion; + + std::cout.setf(std::ios::scientific); + + vector< + int, int, int + > + vsmall_1(BOOST_PP_ENUM_PARAMS(3,)); + + vector< + int, int, int + > + vsmall_2(BOOST_PP_ENUM_PARAMS(3,)); + + vector< + int, int, int, int, int, int, int, int, int, int + > + vmedium_1(BOOST_PP_ENUM_PARAMS(10,)); + + vector< + int, int, int, int, int, int, int, int, int, int + > + vmedium_2(BOOST_PP_ENUM_PARAMS(10,)); + + //~ vector< + //~ int, int, int, int, int, int, int, int, int, int + //~ , int, int, int, int, int, int, int, int, int, int + //~ , int, int, int, int, int, int, int, int, int, int + //~ > + //~ vbig_1(BOOST_PP_ENUM_PARAMS(30,)); + + //~ vector< + //~ int, int, int, int, int, int, int, int, int, int + //~ , int, int, int, int, int, int, int, int, int, int + //~ , int, int, int, int, int, int, int, int, int, int + //~ > + //~ vbig_2(BOOST_PP_ENUM_PARAMS(30,)); + + // first decide how many repetitions to measure + long repeats = 100; + double measured = 0; + while (measured < 2.0 && repeats <= 10000000) + { + repeats *= 10; + + boost::timer time; + + test::hammer<zip_accumulator<int> >(zip(vsmall_1, vsmall_2), repeats); + test::hammer<zip_accumulator<int> >(zip(vmedium_1, vmedium_2), repeats); + //~ test::hammer<zip_accumulator<int> >(zip(vbig_1, vbig_2), repeats); + + measured = time.elapsed(); + } + + check(zip(vsmall_1, vsmall_2), + "small zip accumulated result: "); + check(zip(vmedium_1, vmedium_2), + "medium zip accumulated result: "); + //~ check(zip(vbig_1, vbig_2), + //~ "big zip accumulated result: "); + + measure(zip(vsmall_1, vsmall_2), + "small zip time: ", repeats); + measure(zip(vmedium_1, vmedium_2), + "medium zip time: ", repeats); + //~ measure(zip(vbig_1, vbig_2), + //~ "big zip time: ", repeats); + + // This is ultimately responsible for preventing all the test code + // from being optimized away. Change this to return 0 and you + // unplug the whole test's life support system. + return test::live_code != 0; +} |