From 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 20:24:20 +0200 Subject: Adding upstream version 14.2.21. Signed-off-by: Daniel Baumann --- src/boost/libs/compute/perf/CMakeLists.txt | 210 ++++++++++++++++++ src/boost/libs/compute/perf/perf.hpp | 109 ++++++++++ src/boost/libs/compute/perf/perf.py | 238 +++++++++++++++++++++ src/boost/libs/compute/perf/perf_accumulate.cpp | 140 ++++++++++++ .../compute/perf/perf_bernoulli_distribution.cpp | 46 ++++ src/boost/libs/compute/perf/perf_binary_find.cpp | 71 ++++++ .../libs/compute/perf/perf_bolt_accumulate.cpp | 51 +++++ src/boost/libs/compute/perf/perf_bolt_count.cpp | 57 +++++ .../libs/compute/perf/perf_bolt_exclusive_scan.cpp | 52 +++++ src/boost/libs/compute/perf/perf_bolt_fill.cpp | 43 ++++ .../libs/compute/perf/perf_bolt_inner_product.cpp | 56 +++++ .../libs/compute/perf/perf_bolt_max_element.cpp | 69 ++++++ src/boost/libs/compute/perf/perf_bolt_merge.cpp | 60 ++++++ .../libs/compute/perf/perf_bolt_partial_sum.cpp | 53 +++++ .../libs/compute/perf/perf_bolt_reduce_by_key.cpp | 100 +++++++++ src/boost/libs/compute/perf/perf_bolt_saxpy.cpp | 76 +++++++ src/boost/libs/compute/perf/perf_bolt_sort.cpp | 50 +++++ src/boost/libs/compute/perf/perf_cart_to_polar.cpp | 158 ++++++++++++++ .../libs/compute/perf/perf_comparison_sort.cpp | 86 ++++++++ src/boost/libs/compute/perf/perf_copy_if.cpp | 122 +++++++++++ .../libs/compute/perf/perf_copy_to_device.cpp | 55 +++++ src/boost/libs/compute/perf/perf_count.cpp | 77 +++++++ .../compute/perf/perf_discrete_distribution.cpp | 48 +++++ src/boost/libs/compute/perf/perf_erase_remove.cpp | 61 ++++++ .../libs/compute/perf/perf_exclusive_scan.cpp | 97 +++++++++ src/boost/libs/compute/perf/perf_fill.cpp | 43 ++++ src/boost/libs/compute/perf/perf_find.cpp | 88 ++++++++ src/boost/libs/compute/perf/perf_find_end.cpp | 65 ++++++ src/boost/libs/compute/perf/perf_host_sort.cpp | 65 ++++++ src/boost/libs/compute/perf/perf_includes.cpp | 68 ++++++ src/boost/libs/compute/perf/perf_inner_product.cpp | 74 +++++++ .../libs/compute/perf/perf_is_permutation.cpp | 66 ++++++ src/boost/libs/compute/perf/perf_is_sorted.cpp | 63 ++++++ src/boost/libs/compute/perf/perf_max_element.cpp | 93 ++++++++ src/boost/libs/compute/perf/perf_merge.cpp | 69 ++++++ .../libs/compute/perf/perf_next_permutation.cpp | 65 ++++++ src/boost/libs/compute/perf/perf_nth_element.cpp | 60 ++++++ src/boost/libs/compute/perf/perf_partial_sum.cpp | 97 +++++++++ src/boost/libs/compute/perf/perf_partition.cpp | 66 ++++++ .../libs/compute/perf/perf_partition_point.cpp | 68 ++++++ .../libs/compute/perf/perf_prev_permutation.cpp | 65 ++++++ .../compute/perf/perf_random_number_engine.cpp | 101 +++++++++ src/boost/libs/compute/perf/perf_reduce_by_key.cpp | 114 ++++++++++ src/boost/libs/compute/perf/perf_reverse.cpp | 60 ++++++ src/boost/libs/compute/perf/perf_reverse_copy.cpp | 65 ++++++ src/boost/libs/compute/perf/perf_rotate.cpp | 60 ++++++ src/boost/libs/compute/perf/perf_rotate_copy.cpp | 62 ++++++ src/boost/libs/compute/perf/perf_saxpy.cpp | 162 ++++++++++++++ src/boost/libs/compute/perf/perf_search.cpp | 65 ++++++ src/boost/libs/compute/perf/perf_search_n.cpp | 61 ++++++ .../libs/compute/perf/perf_set_difference.cpp | 75 +++++++ .../libs/compute/perf/perf_set_intersection.cpp | 75 +++++++ .../compute/perf/perf_set_symmetric_difference.cpp | 75 +++++++ src/boost/libs/compute/perf/perf_set_union.cpp | 75 +++++++ src/boost/libs/compute/perf/perf_sort.cpp | 130 +++++++++++ src/boost/libs/compute/perf/perf_sort_by_key.cpp | 79 +++++++ src/boost/libs/compute/perf/perf_sort_float.cpp | 72 +++++++ .../libs/compute/perf/perf_stable_partition.cpp | 62 ++++++ .../libs/compute/perf/perf_stl_accumulate.cpp | 43 ++++ src/boost/libs/compute/perf/perf_stl_count.cpp | 45 ++++ src/boost/libs/compute/perf/perf_stl_find.cpp | 58 +++++ src/boost/libs/compute/perf/perf_stl_find_end.cpp | 44 ++++ src/boost/libs/compute/perf/perf_stl_includes.cpp | 48 +++++ .../libs/compute/perf/perf_stl_inner_product.cpp | 46 ++++ .../libs/compute/perf/perf_stl_is_permutation.cpp | 45 ++++ .../libs/compute/perf/perf_stl_max_element.cpp | 43 ++++ src/boost/libs/compute/perf/perf_stl_merge.cpp | 38 ++++ .../compute/perf/perf_stl_next_permutation.cpp | 43 ++++ .../libs/compute/perf/perf_stl_partial_sum.cpp | 51 +++++ src/boost/libs/compute/perf/perf_stl_partition.cpp | 46 ++++ .../libs/compute/perf/perf_stl_partition_point.cpp | 48 +++++ .../compute/perf/perf_stl_prev_permutation.cpp | 43 ++++ src/boost/libs/compute/perf/perf_stl_reverse.cpp | 41 ++++ .../libs/compute/perf/perf_stl_reverse_copy.cpp | 45 ++++ src/boost/libs/compute/perf/perf_stl_rotate.cpp | 41 ++++ .../libs/compute/perf/perf_stl_rotate_copy.cpp | 43 ++++ src/boost/libs/compute/perf/perf_stl_saxpy.cpp | 52 +++++ src/boost/libs/compute/perf/perf_stl_search.cpp | 44 ++++ src/boost/libs/compute/perf/perf_stl_search_n.cpp | 41 ++++ .../libs/compute/perf/perf_stl_set_difference.cpp | 54 +++++ .../compute/perf/perf_stl_set_intersection.cpp | 54 +++++ .../perf/perf_stl_set_symmetric_difference.cpp | 54 +++++ src/boost/libs/compute/perf/perf_stl_set_union.cpp | 54 +++++ src/boost/libs/compute/perf/perf_stl_sort.cpp | 33 +++ .../compute/perf/perf_stl_stable_partition.cpp | 47 ++++ src/boost/libs/compute/perf/perf_stl_unique.cpp | 41 ++++ .../libs/compute/perf/perf_stl_unique_copy.cpp | 44 ++++ .../libs/compute/perf/perf_tbb_accumulate.cpp | 75 +++++++ src/boost/libs/compute/perf/perf_tbb_merge.cpp | 95 ++++++++ src/boost/libs/compute/perf/perf_tbb_sort.cpp | 35 +++ .../libs/compute/perf/perf_thrust_accumulate.cu | 45 ++++ src/boost/libs/compute/perf/perf_thrust_count.cu | 49 +++++ .../compute/perf/perf_thrust_exclusive_scan.cu | 48 +++++ src/boost/libs/compute/perf/perf_thrust_find.cu | 65 ++++++ .../libs/compute/perf/perf_thrust_inner_product.cu | 49 +++++ src/boost/libs/compute/perf/perf_thrust_merge.cu | 63 ++++++ .../libs/compute/perf/perf_thrust_partial_sum.cu | 48 +++++ .../libs/compute/perf/perf_thrust_partition.cu | 60 ++++++ .../libs/compute/perf/perf_thrust_reduce_by_key.cu | 92 ++++++++ src/boost/libs/compute/perf/perf_thrust_reverse.cu | 48 +++++ .../libs/compute/perf/perf_thrust_reverse_copy.cu | 47 ++++ src/boost/libs/compute/perf/perf_thrust_rotate.cu | 51 +++++ src/boost/libs/compute/perf/perf_thrust_saxpy.cu | 63 ++++++ .../compute/perf/perf_thrust_set_difference.cu | 61 ++++++ src/boost/libs/compute/perf/perf_thrust_sort.cu | 48 +++++ src/boost/libs/compute/perf/perf_thrust_unique.cu | 50 +++++ .../compute/perf/perf_uniform_int_distribution.cpp | 46 ++++ src/boost/libs/compute/perf/perf_unique.cpp | 60 ++++++ src/boost/libs/compute/perf/perf_unique_copy.cpp | 61 ++++++ src/boost/libs/compute/perf/perfdoc.py | 70 ++++++ 110 files changed, 7351 insertions(+) create mode 100644 src/boost/libs/compute/perf/CMakeLists.txt create mode 100644 src/boost/libs/compute/perf/perf.hpp create mode 100755 src/boost/libs/compute/perf/perf.py create mode 100644 src/boost/libs/compute/perf/perf_accumulate.cpp create mode 100644 src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp create mode 100644 src/boost/libs/compute/perf/perf_binary_find.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_accumulate.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_count.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_fill.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_inner_product.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_max_element.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_merge.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_saxpy.cpp create mode 100644 src/boost/libs/compute/perf/perf_bolt_sort.cpp create mode 100644 src/boost/libs/compute/perf/perf_cart_to_polar.cpp create mode 100644 src/boost/libs/compute/perf/perf_comparison_sort.cpp create mode 100644 src/boost/libs/compute/perf/perf_copy_if.cpp create mode 100644 src/boost/libs/compute/perf/perf_copy_to_device.cpp create mode 100644 src/boost/libs/compute/perf/perf_count.cpp create mode 100644 src/boost/libs/compute/perf/perf_discrete_distribution.cpp create mode 100644 src/boost/libs/compute/perf/perf_erase_remove.cpp create mode 100644 src/boost/libs/compute/perf/perf_exclusive_scan.cpp create mode 100644 src/boost/libs/compute/perf/perf_fill.cpp create mode 100644 src/boost/libs/compute/perf/perf_find.cpp create mode 100644 src/boost/libs/compute/perf/perf_find_end.cpp create mode 100644 src/boost/libs/compute/perf/perf_host_sort.cpp create mode 100644 src/boost/libs/compute/perf/perf_includes.cpp create mode 100644 src/boost/libs/compute/perf/perf_inner_product.cpp create mode 100644 src/boost/libs/compute/perf/perf_is_permutation.cpp create mode 100644 src/boost/libs/compute/perf/perf_is_sorted.cpp create mode 100644 src/boost/libs/compute/perf/perf_max_element.cpp create mode 100644 src/boost/libs/compute/perf/perf_merge.cpp create mode 100644 src/boost/libs/compute/perf/perf_next_permutation.cpp create mode 100644 src/boost/libs/compute/perf/perf_nth_element.cpp create mode 100644 src/boost/libs/compute/perf/perf_partial_sum.cpp create mode 100644 src/boost/libs/compute/perf/perf_partition.cpp create mode 100644 src/boost/libs/compute/perf/perf_partition_point.cpp create mode 100644 src/boost/libs/compute/perf/perf_prev_permutation.cpp create mode 100644 src/boost/libs/compute/perf/perf_random_number_engine.cpp create mode 100644 src/boost/libs/compute/perf/perf_reduce_by_key.cpp create mode 100644 src/boost/libs/compute/perf/perf_reverse.cpp create mode 100644 src/boost/libs/compute/perf/perf_reverse_copy.cpp create mode 100644 src/boost/libs/compute/perf/perf_rotate.cpp create mode 100644 src/boost/libs/compute/perf/perf_rotate_copy.cpp create mode 100644 src/boost/libs/compute/perf/perf_saxpy.cpp create mode 100644 src/boost/libs/compute/perf/perf_search.cpp create mode 100644 src/boost/libs/compute/perf/perf_search_n.cpp create mode 100644 src/boost/libs/compute/perf/perf_set_difference.cpp create mode 100644 src/boost/libs/compute/perf/perf_set_intersection.cpp create mode 100644 src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp create mode 100644 src/boost/libs/compute/perf/perf_set_union.cpp create mode 100644 src/boost/libs/compute/perf/perf_sort.cpp create mode 100644 src/boost/libs/compute/perf/perf_sort_by_key.cpp create mode 100644 src/boost/libs/compute/perf/perf_sort_float.cpp create mode 100644 src/boost/libs/compute/perf/perf_stable_partition.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_accumulate.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_count.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_find.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_find_end.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_includes.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_inner_product.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_is_permutation.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_max_element.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_merge.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_next_permutation.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_partial_sum.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_partition.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_partition_point.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_reverse.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_rotate.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_saxpy.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_search.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_search_n.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_set_difference.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_set_intersection.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_set_union.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_sort.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_stable_partition.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_unique.cpp create mode 100644 src/boost/libs/compute/perf/perf_stl_unique_copy.cpp create mode 100644 src/boost/libs/compute/perf/perf_tbb_accumulate.cpp create mode 100644 src/boost/libs/compute/perf/perf_tbb_merge.cpp create mode 100644 src/boost/libs/compute/perf/perf_tbb_sort.cpp create mode 100644 src/boost/libs/compute/perf/perf_thrust_accumulate.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_count.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_find.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_inner_product.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_merge.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_partial_sum.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_partition.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_reverse.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_rotate.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_saxpy.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_set_difference.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_sort.cu create mode 100644 src/boost/libs/compute/perf/perf_thrust_unique.cu create mode 100644 src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp create mode 100644 src/boost/libs/compute/perf/perf_unique.cpp create mode 100644 src/boost/libs/compute/perf/perf_unique_copy.cpp create mode 100755 src/boost/libs/compute/perf/perfdoc.py (limited to 'src/boost/libs/compute/perf') diff --git a/src/boost/libs/compute/perf/CMakeLists.txt b/src/boost/libs/compute/perf/CMakeLists.txt new file mode 100644 index 00000000..b04429e9 --- /dev/null +++ b/src/boost/libs/compute/perf/CMakeLists.txt @@ -0,0 +1,210 @@ +# --------------------------------------------------------------------------- +# Copyright (c) 2013 Kyle Lutz +# +# Distributed under the Boost Software License, Version 1.0 +# See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt +# +# --------------------------------------------------------------------------- + +include_directories(../include) + +set(PERF_BOOST_COMPONENTS system timer chrono program_options) + +if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) + set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} filesystem) +endif() + +if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11}) + set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread) +elseif(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11}) + set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread) +endif() + +if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11}) + set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} date_time) +endif() + +if(PERF_BOOST_COMPONENTS) + list(REMOVE_DUPLICATES PERF_BOOST_COMPONENTS) +endif() +find_package(Boost 1.54 REQUIRED COMPONENTS ${PERF_BOOST_COMPONENTS}) +include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) + +set(BENCHMARKS + accumulate + bernoulli_distribution + binary_find + cart_to_polar + comparison_sort + copy_if + copy_to_device + count + discrete_distribution + erase_remove + exclusive_scan + fill + find + find_end + includes + inner_product + is_permutation + is_sorted + max_element + merge + next_permutation + nth_element + partial_sum + partition + partition_point + prev_permutation + reverse + reverse_copy + rotate + rotate_copy + host_sort + random_number_engine + reduce_by_key + saxpy + search + search_n + set_difference + set_intersection + set_symmetric_difference + set_union + sort + sort_by_key + sort_float + stable_partition + uniform_int_distribution + unique + unique_copy +) + +foreach(BENCHMARK ${BENCHMARKS}) + set(PERF_TARGET perf_${BENCHMARK}) + add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) + target_link_libraries(${PERF_TARGET} ${OpenCL_LIBRARIES} ${Boost_LIBRARIES}) +endforeach() + +# stl benchmarks (for comparison) +set(STL_BENCHMARKS + stl_accumulate + stl_count + stl_find + stl_find_end + stl_includes + stl_inner_product + stl_max_element + stl_merge + stl_next_permutation + stl_partial_sum + stl_partition + stl_prev_permutation + stl_reverse + stl_reverse_copy + stl_rotate + stl_rotate_copy + stl_saxpy + stl_search + stl_search_n + stl_set_difference + stl_set_intersection + stl_set_symmetric_difference + stl_set_union + stl_sort + stl_stable_partition + stl_unique + stl_unique_copy +) + +# stl benchmarks which require c++11 +if(${BOOST_COMPUTE_USE_CPP11}) + list(APPEND + STL_BENCHMARKS + stl_is_permutation + stl_partition_point + ) +endif() + +foreach(BENCHMARK ${STL_BENCHMARKS}) + set(PERF_TARGET perf_${BENCHMARK}) + add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) + target_link_libraries(${PERF_TARGET} ${Boost_LIBRARIES}) +endforeach() + +# cuda/thrust benchmarks (for comparison) +if(${BOOST_COMPUTE_HAVE_CUDA}) + find_package(CUDA 5.0 REQUIRED) + + set(CUDA_BENCHMARKS + thrust_accumulate + thrust_count + thrust_exclusive_scan + thrust_find + thrust_inner_product + thrust_merge + thrust_partial_sum + thrust_partition + thrust_reduce_by_key + thrust_reverse + thrust_reverse_copy + thrust_rotate + thrust_saxpy + thrust_set_difference + thrust_sort + thrust_unique + ) + + foreach(BENCHMARK ${CUDA_BENCHMARKS}) + set(PERF_TARGET perf_${BENCHMARK}) + cuda_add_executable(${PERF_TARGET} perf_${BENCHMARK}.cu) + target_link_libraries(${PERF_TARGET} ${CUDA_LIBRARIES} ${Boost_LIBRARIES}) + endforeach() +endif() + +# intel tbb benchmarks (for comparison) +if(${BOOST_COMPUTE_HAVE_TBB}) + find_package(TBB REQUIRED) + include_directories(SYSTEM ${TBB_INCLUDE_DIRS}) + + set(TBB_BENCHMARKS + tbb_accumulate + tbb_merge + tbb_sort + ) + + foreach(BENCHMARK ${TBB_BENCHMARKS}) + set(PERF_TARGET perf_${BENCHMARK}) + add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) + target_link_libraries(${PERF_TARGET} ${TBB_LIBRARIES} ${Boost_LIBRARIES}) + endforeach() +endif() + +# bolt c++ template lib benchmarks (for comparison) +if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11}) + find_package(Bolt REQUIRED) + include_directories(SYSTEM ${BOLT_INCLUDE_DIRS}) + + set(BOLT_BENCHMARKS + bolt_accumulate + bolt_count + bolt_exclusive_scan + bolt_fill + bolt_inner_product + bolt_max_element + bolt_merge + bolt_partial_sum + bolt_reduce_by_key + bolt_saxpy + bolt_sort + ) + + foreach(BENCHMARK ${BOLT_BENCHMARKS}) + set(PERF_TARGET perf_${BENCHMARK}) + add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) + target_link_libraries(${PERF_TARGET} ${OpenCL_LIBRARIES} ${BOLT_LIBRARIES} ${Boost_LIBRARIES}) + endforeach() +elseif(${BOOST_COMPUTE_HAVE_BOLT} AND NOT ${BOOST_COMPUTE_USE_CPP11}) + message(WARNING "BOOST_COMPUTE_USE_CPP11 must be ON for building Bolt C++ Template Library performance tests.") +endif() diff --git a/src/boost/libs/compute/perf/perf.hpp b/src/boost/libs/compute/perf/perf.hpp new file mode 100644 index 00000000..cce0328c --- /dev/null +++ b/src/boost/libs/compute/perf/perf.hpp @@ -0,0 +1,109 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef PERF_HPP +#define PERF_HPP + +// this header contains general purpose functions and variables used by +// the boost.compute performance benchmarks. + +#include +#include +#include + +#include +#include + +static size_t PERF_N = 1024; +static size_t PERF_TRIALS = 3; + +// parses command line arguments and sets the corresponding perf variables +inline void perf_parse_args(int argc, char *argv[]) +{ + if(argc >= 2){ + PERF_N = boost::lexical_cast(argv[1]); + } + + if(argc >= 3){ + PERF_TRIALS = boost::lexical_cast(argv[2]); + } +} + +// generates a vector of random numbers +template +std::vector generate_random_vector(const size_t size) +{ + std::vector vector(size); + std::generate(vector.begin(), vector.end(), rand); + return vector; +} + +// a simple timer wrapper which records multiple time entries +class perf_timer +{ +public: + typedef boost::timer::nanosecond_type nanosecond_type; + + perf_timer() + { + timer.stop(); + } + + void start() + { + timer.start(); + } + + void stop() + { + timer.stop(); + times.push_back(timer.elapsed().wall); + } + + size_t trials() const + { + return times.size(); + } + + void clear() + { + times.clear(); + } + + nanosecond_type last_time() const + { + return times.back(); + } + + nanosecond_type min_time() const + { + return *std::min_element(times.begin(), times.end()); + } + + nanosecond_type max_time() const + { + return *std::max_element(times.begin(), times.end()); + } + + boost::timer::cpu_timer timer; + std::vector times; +}; + +// returns the rate (in MB/s) for processing 'count' items of type 'T' +// in 'time' nanoseconds +template +double perf_rate(const size_t count, perf_timer::nanosecond_type time) +{ + const size_t byte_count = count * sizeof(T); + + return (double(byte_count) / 1024 / 1024) / (time / 1e9); +} + +#endif // PERF_HPP diff --git a/src/boost/libs/compute/perf/perf.py b/src/boost/libs/compute/perf/perf.py new file mode 100755 index 00000000..c7b33f63 --- /dev/null +++ b/src/boost/libs/compute/perf/perf.py @@ -0,0 +1,238 @@ +#!/usr/bin/python + +# Copyright (c) 2014 Kyle Lutz +# Distributed under the Boost Software License, Version 1.0 +# See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt +# +# See http://boostorg.github.com/compute for more information. + +# driver script for boost.compute benchmarking. will run a +# benchmark for a given function (e.g. accumulate, sort). + +import os +import sys +import subprocess + +try: + import pylab +except: + print('pylab not found, no ploting...') + pass + +def run_perf_process(name, size, backend = ""): + if not backend: + proc = "perf_%s" % name + else: + proc = "perf_%s_%s" % (backend, name) + + filename = "./perf/" + proc + + if not os.path.isfile(filename): + print("Error: failed to find ", filename, " for running") + return 0 + try: + output = subprocess.check_output([filename, str(int(size))]) + except: + return 0 + + t = 0 + for line in output.decode('utf8').split("\n"): + if line.startswith("time:"): + t = float(line.split(":")[1].split()[0]) + + return t + +class Report: + def __init__(self, name): + self.name = name + self.samples = {} + + def add_sample(self, name, size, time): + if not name in self.samples: + self.samples[name] = [] + + self.samples[name].append((size, time)) + + def display(self): + for name in self.samples.keys(): + print('=== %s with %s ===' % (self.name, name)) + print('size,time (ms)') + + for sample in self.samples[name]: + print('%d,%f' % sample) + + def plot_time(self, name): + if not name in self.samples: + return + + x = [] + y = [] + + any_valid_samples = False + + for sample in self.samples[name]: + if sample[1] == 0: + continue + + x.append(sample[0]) + y.append(sample[1]) + any_valid_samples = True + + if not any_valid_samples: + return + + pylab.loglog(x, y, marker='o', label=name) + pylab.xlabel("Size") + pylab.ylabel("Time (ms)") + pylab.title(self.name) + + def plot_rate(self, name): + if not name in self.samples: + return + + x = [] + y = [] + + any_valid_samples = False + + for sample in self.samples[name]: + if sample[1] == 0: + continue + + x.append(sample[0]) + y.append(float(sample[0]) / (float(sample[1]) * 1e-3)) + any_valid_samples = True + + if not any_valid_samples: + return + + pylab.loglog(x, y, marker='o', label=name) + pylab.xlabel("Size") + pylab.ylabel("Rate (values/s)") + pylab.title(self.name) + +def run_benchmark(name, sizes, vs=[]): + report = Report(name) + + for size in sizes: + time = run_perf_process(name, size) + + report.add_sample("compute", size, time) + + competitors = { + "thrust" : [ + "accumulate", + "count", + "exclusive_scan", + "find", + "inner_product", + "merge", + "partial_sum", + "partition", + "reduce_by_key", + "reverse", + "reverse_copy", + "rotate", + "saxpy", + "sort", + "unique" + ], + "bolt" : [ + "accumulate", + "count", + "exclusive_scan", + "fill", + "inner_product", + "max_element", + "merge", + "partial_sum", + "reduce_by_key", + "saxpy", + "sort" + ], + "tbb": [ + "accumulate", + "merge", + "sort" + ], + "stl": [ + "accumulate", + "count", + "find", + "find_end", + "includes", + "inner_product", + "is_permutation", + "max_element", + "merge", + "next_permutation", + "nth_element", + "partial_sum", + "partition", + "partition_point", + "prev_permutation", + "reverse", + "reverse_copy", + "rotate", + "rotate_copy", + "saxpy", + "search", + "search_n", + "set_difference", + "set_intersection", + "set_symmetric_difference", + "set_union", + "sort", + "stable_partition", + "unique", + "unique_copy" + ] + } + + for other in vs: + if not other in competitors: + continue + if not name in competitors[other]: + continue + + for size in sizes: + time = run_perf_process(name, size, other) + report.add_sample(other, size, time) + + return report + +if __name__ == '__main__': + test = "sort" + if len(sys.argv) >= 2: + test = sys.argv[1] + print('running %s perf test' % test) + + sizes = [ pow(2, x) for x in range(1, 26) ] + + sizes = sorted(sizes) + + competitors = ["bolt", "tbb", "thrust", "stl"] + + report = run_benchmark(test, sizes, competitors) + + plot = None + if "--plot-time" in sys.argv: + plot = "time" + elif "--plot-rate" in sys.argv: + plot = "rate" + + if plot == "time": + report.plot_time("compute") + for competitor in competitors: + report.plot_time(competitor) + elif plot == "rate": + report.plot_rate("compute") + for competitor in competitors: + report.plot_rate(competitor) + + if plot: + pylab.legend(loc='upper left') + pylab.show() + else: + report.display() diff --git a/src/boost/libs/compute/perf/perf_accumulate.cpp b/src/boost/libs/compute/perf/perf_accumulate.cpp new file mode 100644 index 00000000..bd4276e6 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_accumulate.cpp @@ -0,0 +1,140 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "perf.hpp" + +namespace po = boost::program_options; +namespace compute = boost::compute; + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +template +double perf_accumulate(const compute::vector& data, + const size_t trials, + compute::command_queue& queue) +{ + perf_timer t; + for(size_t trial = 0; trial < trials; trial++){ + t.start(); + compute::accumulate(data.begin(), data.end(), T(0), queue); + queue.finish(); + t.stop(); + } + return t.min_time(); +} + +template +void tune_accumulate(const compute::vector& data, + const size_t trials, + compute::command_queue& queue) +{ + boost::shared_ptr + params = compute::detail::parameter_cache::get_global_cache(queue.get_device()); + + const std::string cache_key = + std::string("__boost_reduce_on_gpu_") + compute::type_name(); + + const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; + const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + + double min_time = (std::numeric_limits::max)(); + compute::uint_ best_tpb = 0; + compute::uint_ best_vpt = 0; + + for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){ + params->set(cache_key, "tpb", tpbs[i]); + for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){ + params->set(cache_key, "vpt", vpts[j]); + + try { + const double t = perf_accumulate(data, trials, queue); + if(t < min_time){ + best_tpb = tpbs[i]; + best_vpt = vpts[j]; + min_time = t; + } + } + catch(compute::opencl_error&){ + // invalid parameters for this device, skip + } + } + } + + // store optimal parameters + params->set(cache_key, "tpb", best_tpb); + params->set(cache_key, "vpt", best_vpt); +} + +int main(int argc, char *argv[]) +{ + // setup command line arguments + po::options_description options("options"); + options.add_options() + ("help", "show usage instructions") + ("size", po::value()->default_value(8192), "input size") + ("trials", po::value()->default_value(3), "number of trials to run") + ("tune", "run tuning procedure") + ; + po::positional_options_description positional_options; + positional_options.add("size", 1); + + // parse command line + po::variables_map vm; + po::store( + po::command_line_parser(argc, argv) + .options(options).positional(positional_options).run(), + vm + ); + po::notify(vm); + + const size_t size = vm["size"].as(); + const size_t trials = vm["trials"].as(); + std::cout << "size: " << size << std::endl; + + // setup context and queue for the default device + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_data(size); + std::generate(host_data.begin(), host_data.end(), rand_int); + + // create vector on the device and copy the data + compute::vector device_data( + host_data.begin(), host_data.end(), queue + ); + + // run tuning proceure (if requested) + if(vm.count("tune")){ + tune_accumulate(device_data, trials, queue); + } + + // run benchmark + double t = perf_accumulate(device_data, trials, queue); + std::cout << "time: " << t / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp b/src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp new file mode 100644 index 00000000..bffbe58f --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp @@ -0,0 +1,46 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +namespace compute = boost::compute; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + compute::vector vector(PERF_N, context); + + compute::default_random_engine rng(queue); + compute::bernoulli_distribution dist(0.5); + + perf_timer t; + t.start(); + dist.generate(vector.begin(), vector.end(), rng, queue); + queue.finish(); + t.stop(); + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_binary_find.cpp b/src/boost/libs/compute/perf/perf_binary_find.cpp new file mode 100644 index 00000000..ee7c4631 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_binary_find.cpp @@ -0,0 +1,71 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + using boost::compute::_1; + boost::compute::partition( + device_vector.begin(), device_vector.end(), _1 < 20, queue + ); + + // just to be sure everything is finished before measuring execution time + // of binary_find algorithm + queue.finish(); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::detail::binary_find( + device_vector.begin(), device_vector.end(), _1 >= 20, queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_accumulate.cpp b/src/boost/libs/compute/perf/perf_bolt_accumulate.cpp new file mode 100644 index 00000000..5a6b9b9c --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_accumulate.cpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create host vector + std::vector host_vec = generate_random_vector(PERF_N); + + // create device vectors + bolt::cl::device_vector device_vec(PERF_N); + + // transfer data to the device + bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin()); + + int sum = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + sum = bolt::cl::reduce(device_vec.begin(), device_vec.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "sum: " << sum << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_count.cpp b/src/boost/libs/compute/perf/perf_bolt_count.cpp new file mode 100644 index 00000000..a23c5ac3 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_count.cpp @@ -0,0 +1,57 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create vector of random numbers on the host + std::vector h_vec(PERF_N); + std::generate(h_vec.begin(), h_vec.end(), rand_int); + + // create device vector + bolt::cl::device_vector d_vec(PERF_N); + + // transfer data to the device + bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + size_t count = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + count = bolt::cl::count(ctrl, d_vec.begin(), d_vec.end(), 4); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "count: " << count << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp b/src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp new file mode 100644 index 00000000..a60e4655 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp @@ -0,0 +1,52 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create vector of random numbers on the host + std::vector h_vec = generate_random_vector(PERF_N); + + // create device vector + bolt::cl::device_vector d_vec(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + // transfer data to the device + bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + t.start(); + bolt::cl::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_fill.cpp b/src/boost/libs/compute/perf/perf_bolt_fill.cpp new file mode 100644 index 00000000..50b6e85e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_fill.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create device vector (filled with zeros) + bolt::cl::device_vector d_vec(PERF_N, 0); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + bolt::cl::fill(d_vec.begin(), d_vec.end(), int(trial)); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_inner_product.cpp b/src/boost/libs/compute/perf/perf_bolt_inner_product.cpp new file mode 100644 index 00000000..4c9652e6 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_inner_product.cpp @@ -0,0 +1,56 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create host vectors + std::vector host_x = generate_random_vector(PERF_N); + std::vector host_y = generate_random_vector(PERF_N); + + // create device vectors + bolt::cl::device_vector device_x(PERF_N); + bolt::cl::device_vector device_y(PERF_N); + + // transfer data to the device + bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin()); + bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin()); + + int product = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + product = bolt::cl::inner_product( + device_x.begin(), device_x.end(), device_y.begin(), 0 + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "product: " << product << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_max_element.cpp b/src/boost/libs/compute/perf/perf_bolt_max_element.cpp new file mode 100644 index 00000000..238b1ba2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_max_element.cpp @@ -0,0 +1,69 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast(rand() % 10000000); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create host vector + std::vector host_vec = generate_random_vector(PERF_N); + + // create device vectors + bolt::cl::device_vector device_vec(PERF_N); + + // transfer data to the device + bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin()); + + bolt::cl::device_vector::iterator max_iter = device_vec.begin(); + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + max_iter = bolt::cl::max_element(device_vec.begin(), device_vec.end()); + t.stop(); + } + + int device_max = *max_iter; + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "max: " << device_max << std::endl; + + // verify max is correct + int host_max = *std::max_element(host_vec.begin(), host_vec.end()); + if(device_max != host_max){ + std::cout << "ERROR: " + << "device_max (" << device_max << ") " + << "!= " + << "host_max (" << host_max << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_merge.cpp b/src/boost/libs/compute/perf/perf_bolt_merge.cpp new file mode 100644 index 00000000..c5ee12ac --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_merge.cpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create vector of random numbers on the host + std::vector host_vec1 = generate_random_vector(std::floor(PERF_N / 2.0)); + std::vector host_vec2 = generate_random_vector(std::ceil(PERF_N / 2.0)); + // sort them + std::sort(host_vec1.begin(), host_vec1.end()); + std::sort(host_vec2.begin(), host_vec2.end()); + + // create device vectors + bolt::cl::device_vector device_vec1(PERF_N); + bolt::cl::device_vector device_vec2(PERF_N); + bolt::cl::device_vector device_vec3(PERF_N); + + // transfer data to the device + bolt::cl::copy(host_vec1.begin(), host_vec1.end(), device_vec1.begin()); + bolt::cl::copy(host_vec2.begin(), host_vec2.end(), device_vec2.begin()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + bolt::cl::merge( + device_vec1.begin(), device_vec1.end(), + device_vec2.begin(), device_vec2.end(), + device_vec3.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp b/src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp new file mode 100644 index 00000000..2f9c830e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp @@ -0,0 +1,53 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create vector of random numbers on the host + std::vector h_vec = generate_random_vector(PERF_N); + + // create device vector + bolt::cl::device_vector d_vec(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + // transfer data to the device + bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + t.start(); + bolt::cl::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} + diff --git a/src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp b/src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp new file mode 100644 index 00000000..e7668498 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp @@ -0,0 +1,100 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +struct unique_key { + int current; + int avgValuesNoPerKey; + + unique_key() + { + current = 0; + avgValuesNoPerKey = 512; + } + + int operator()() + { + double p = double(1.0) / static_cast(avgValuesNoPerKey); + if((rand() / double(RAND_MAX)) <= p) + return ++current; + return current; + } +} UniqueKey; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create vector of keys and random values + std::vector host_keys(PERF_N); + std::vector host_values(PERF_N); + std::generate(host_keys.begin(), host_keys.end(), UniqueKey); + std::generate(host_values.begin(), host_values.end(), rand_int); + + // create device vectors for data + bolt::cl::device_vector device_keys(PERF_N); + bolt::cl::device_vector device_values(PERF_N); + + // transfer data to the device + bolt::cl::copy(host_keys.begin(), host_keys.end(), device_keys.begin()); + bolt::cl::copy(host_values.begin(), host_values.end(), device_values.begin()); + + // create device vectors for the results + bolt::cl::device_vector device_keys_results(PERF_N); + bolt::cl::device_vector device_values_results(PERF_N); + + typedef bolt::cl::device_vector::iterator iterType; + bolt::cl::pair result = { + device_keys_results.begin(), + device_values_results.begin() + }; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + result = bolt::cl::reduce_by_key(device_keys.begin(), + device_keys.end(), + device_values.begin(), + device_keys_results.begin(), + device_values_results.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + size_t result_size = bolt::cl::distance(device_keys_results.begin(), result.first); + if(result_size != static_cast(host_keys[PERF_N-1] + 1)){ + std::cout << "ERROR: " + << "wrong number of keys" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_saxpy.cpp b/src/boost/libs/compute/perf/perf_bolt_saxpy.cpp new file mode 100644 index 00000000..201f683a --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_saxpy.cpp @@ -0,0 +1,76 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +BOLT_FUNCTOR(saxpy_functor, + struct saxpy_functor + { + float _a; + saxpy_functor(float a) : _a(a) {}; + + float operator() (const float &x, const float &y) const + { + return _a * x + y; + }; + }; +) + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + bolt::cl::control ctrl = bolt::cl::control::getDefault(); + ::cl::Device device = ctrl.getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create host vectors + std::vector host_x(PERF_N); + std::vector host_y(PERF_N); + std::generate(host_x.begin(), host_x.end(), rand); + std::generate(host_y.begin(), host_y.end(), rand); + + // create device vectors + bolt::cl::device_vector device_x(PERF_N); + bolt::cl::device_vector device_y(PERF_N); + + // transfer data to the device + bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin()); + bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + bolt::cl::transform( + device_x.begin(), device_x.end(), + device_y.begin(), + device_y.begin(), + saxpy_functor(2.5f) + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + bolt::cl::copy(device_x.begin(), device_x.end(), host_x.begin()); + bolt::cl::copy(device_y.begin(), device_y.end(), host_y.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_bolt_sort.cpp b/src/boost/libs/compute/perf/perf_bolt_sort.cpp new file mode 100644 index 00000000..2508ec06 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_bolt_sort.cpp @@ -0,0 +1,50 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + ::cl::Device device = bolt::cl::control::getDefault().getDevice(); + std::cout << "device: " << device.getInfo() << std::endl; + + // create host vector + std::vector h_vec = generate_random_vector(PERF_N); + // create device vector + bolt::cl::device_vector d_vec(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + // transfer data to the device + bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin()); + + t.start(); + bolt::cl::sort(d_vec.begin(), d_vec.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_cart_to_polar.cpp b/src/boost/libs/compute/perf/perf_cart_to_polar.cpp new file mode 100644 index 00000000..e0d333eb --- /dev/null +++ b/src/boost/libs/compute/perf/perf_cart_to_polar.cpp @@ -0,0 +1,158 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#define _USE_MATH_DEFINES +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +namespace compute = boost::compute; + +using compute::float2_; + +float rand_float() +{ + return (float(rand()) / float(RAND_MAX)) * 1000.f; +} + +void serial_cartesian_to_polar(const float *input, size_t n, float *output) +{ + for(size_t i = 0; i < n; i++){ + float x = input[i*2+0]; + float y = input[i*2+1]; + + float magnitude = std::sqrt(x*x + y*y); + float angle = std::atan2(y, x) * 180.f / M_PI; + + output[i*2+0] = magnitude; + output[i*2+1] = angle; + } +} + +void serial_polar_to_cartesian(const float *input, size_t n, float *output) +{ + for(size_t i = 0; i < n; i++){ + float magnitude = input[i*2+0]; + float angle = input[i*2+1]; + + float x = magnitude * cos(angle); + float y = magnitude * sin(angle); + + output[i*2+0] = x; + output[i*2+1] = y; + } +} + +// converts from cartesian coordinates (x, y) to polar coordinates (magnitude, angle) +BOOST_COMPUTE_FUNCTION(float2_, cartesian_to_polar, (float2_ p), +{ + float x = p.x; + float y = p.y; + + float magnitude = sqrt(x*x + y*y); + float angle = atan2(y, x) * 180.f / M_PI; + + return (float2)(magnitude, angle); +}); + +// converts from polar coordinates (magnitude, angle) to cartesian coordinates (x, y) +BOOST_COMPUTE_FUNCTION(float2_, polar_to_cartesian, (float2_ p), +{ + float magnitude = p.x; + float angle = p.y; + + float x = magnitude * cos(angle); + float y = magnitude * sin(angle); + + return (float2)(x, y) +}); + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N*2); + std::generate(host_vector.begin(), host_vector.end(), rand_float); + + // create vector on the device and copy the data + compute::vector device_vector(PERF_N, context); + compute::copy_n( + reinterpret_cast(&host_vector[0]), + PERF_N, + device_vector.begin(), + queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + compute::transform( + device_vector.begin(), + device_vector.end(), + device_vector.begin(), + cartesian_to_polar, + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // perform saxpy on host + t.clear(); + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + serial_cartesian_to_polar(&host_vector[0], PERF_N, &host_vector[0]); + t.stop(); + } + std::cout << "host time: " << t.min_time() / 1e6 << " ms" << std::endl; + + std::vector device_data(PERF_N*2); + compute::copy( + device_vector.begin(), + device_vector.end(), + reinterpret_cast(&device_data[0]), + queue + ); + + for(size_t i = 0; i < PERF_N; i++){ + float host_value = host_vector[i]; + float device_value = device_data[i]; + + if(std::abs(device_value - host_value) > 1e-3){ + std::cout << "ERROR: " + << "value at " << i << " " + << "device_value (" << device_value << ") " + << "!= " + << "host_value (" << host_value << ")" + << std::endl; + return -1; + } + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_comparison_sort.cpp b/src/boost/libs/compute/perf/perf_comparison_sort.cpp new file mode 100644 index 00000000..fa63303f --- /dev/null +++ b/src/boost/libs/compute/perf/perf_comparison_sort.cpp @@ -0,0 +1,86 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +namespace po = boost::program_options; +namespace compute = boost::compute; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + using boost::compute::int_; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + + // less function for float + BOOST_COMPUTE_FUNCTION(bool, comp, (int_ a, int_ b), + { + return a < b; + }); + + // sort vector + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + queue.finish(); + + t.start(); + boost::compute::sort( + device_vector.begin(), + device_vector.end(), + comp, + queue + ); + queue.finish(); + t.stop(); + }; + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify vector is sorted + if(!boost::compute::is_sorted(device_vector.begin(), + device_vector.end(), + comp, + queue)){ + std::cout << "ERROR: is_sorted() returned false" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_copy_if.cpp b/src/boost/libs/compute/perf/perf_copy_if.cpp new file mode 100644 index 00000000..b2c162f6 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_copy_if.cpp @@ -0,0 +1,122 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include +#include +#include +#include + +#include "perf.hpp" + +namespace compute = boost::compute; + +void test_copy_if_odd(compute::command_queue &queue) +{ + // create input and output vectors on the device + const compute::context &context = queue.get_context(); + compute::vector input(PERF_N, context); + compute::vector output(PERF_N, context); + + // generate random numbers between 1 and 10 + compute::default_random_engine rng(queue); + compute::uniform_int_distribution d(1, 10); + d.generate(input.begin(), input.end(), rng, queue); + + BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x), + { + return x & 1; + }); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + compute::vector::iterator i = compute::copy_if( + input.begin(), input.end(), output.begin(), is_odd, queue + ); + queue.finish(); + t.stop(); + + float ratio = float(std::distance(output.begin(), i)) / PERF_N; + if(PERF_N > 1000 && (ratio < 0.45f || ratio > 0.55f)){ + std::cerr << "error: ratio is " << ratio << std::endl; + std::cerr << "error: ratio should be around 45-55%" << std::endl; + } + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; +} + +void test_copy_if_in_sphere(compute::command_queue &queue) +{ + using boost::compute::float4_; + + // create input and output vectors on the device + const compute::context &context = queue.get_context(); + compute::vector input_points(PERF_N, context); + compute::vector output_points(PERF_N, context); + + // generate random numbers in a cube + float radius = 5.0f; + compute::default_random_engine rng(queue); + compute::uniform_real_distribution d(-radius, +radius); + d.generate( + compute::make_buffer_iterator(input_points.get_buffer(), 0), + compute::make_buffer_iterator(input_points.get_buffer(), PERF_N * 4), + rng, + queue + ); + + // predicate which returns true if the point lies within the sphere + BOOST_COMPUTE_CLOSURE(bool, is_in_sphere, (float4_ point), (radius), + { + // ignore fourth component + point.w = 0; + + return length(point) < radius; + }); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + compute::vector::iterator i = compute::copy_if( + input_points.begin(), + input_points.end(), + output_points.begin(), + is_in_sphere, + queue + ); + queue.finish(); + t.stop(); + + float ratio = float(std::distance(output_points.begin(), i)) / PERF_N; + if(PERF_N > 1000 && (ratio < 0.5f || ratio > 0.6f)){ + std::cerr << "error: ratio is " << ratio << std::endl; + std::cerr << "error: ratio should be around 50-60%" << std::endl; + } + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + test_copy_if_odd(queue); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_copy_to_device.cpp b/src/boost/libs/compute/perf/perf_copy_to_device.cpp new file mode 100644 index 00000000..0cda3d0f --- /dev/null +++ b/src/boost/libs/compute/perf/perf_copy_to_device.cpp @@ -0,0 +1,55 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include + +int main(int argc, char *argv[]) +{ + size_t size = 1000; + if(argc >= 2){ + size = boost::lexical_cast(argv[1]); + } + + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + + boost::compute::command_queue::properties + properties = boost::compute::command_queue::enable_profiling; + boost::compute::command_queue queue(context, device, properties); + + std::vector host_vector(size); + std::generate(host_vector.begin(), host_vector.end(), rand); + + boost::compute::vector device_vector(host_vector.size(), context); + + boost::compute::future future = + boost::compute::copy_async(host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue); + + // wait for copy to finish + future.wait(); + + // get elapsed time in nanoseconds + size_t elapsed = + future.get_event().duration().count(); + + std::cout << "time: " << elapsed / 1e6 << " ms" << std::endl; + + float rate = (float(size * sizeof(int)) / elapsed) * 1000.f; + std::cout << "rate: " << rate << " MB/s" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_count.cpp b/src/boost/libs/compute/perf/perf_count.cpp new file mode 100644 index 00000000..a47eb615 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_count.cpp @@ -0,0 +1,77 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + size_t count = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + count = boost::compute::count( + device_vector.begin(), device_vector.end(), 4, queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "count: " << count << std::endl; + + // verify count is correct + size_t host_count = std::count(host_vector.begin(), + host_vector.end(), + 4); + if(count != host_count){ + std::cout << "ERROR: " + << "device_count (" << count << ") " + << "!= " + << "host_count (" << host_count << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_discrete_distribution.cpp b/src/boost/libs/compute/perf/perf_discrete_distribution.cpp new file mode 100644 index 00000000..f6679eb1 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_discrete_distribution.cpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +namespace compute = boost::compute; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + compute::vector vector(PERF_N, context); + + int weights[] = {1, 1}; + + compute::default_random_engine rng(queue); + compute::discrete_distribution dist(weights, weights+2); + + perf_timer t; + t.start(); + dist.generate(vector.begin(), vector.end(), rng, queue); + queue.finish(); + t.stop(); + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_erase_remove.cpp b/src/boost/libs/compute/perf/perf_erase_remove.cpp new file mode 100644 index 00000000..f567247f --- /dev/null +++ b/src/boost/libs/compute/perf/perf_erase_remove.cpp @@ -0,0 +1,61 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 10.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::vector device_vector( + host_vector.begin(), host_vector.end(), queue + ); + + t.start(); + device_vector.erase( + boost::compute::remove( + device_vector.begin(), device_vector.end(), 4, queue + ), + device_vector.end(), + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_exclusive_scan.cpp b/src/boost/libs/compute/perf/perf_exclusive_scan.cpp new file mode 100644 index 00000000..e6c65135 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_exclusive_scan.cpp @@ -0,0 +1,97 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Benoit +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::vector device_res(PERF_N,context); + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + // sum vector + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + t.start(); + boost::compute::exclusive_scan( + device_vector.begin(), + device_vector.end(), + device_res.begin(), + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify sum is correct + std::partial_sum( + host_vector.begin(), + host_vector.end(), + host_vector.begin() + ); + + int device_sum = device_res.back(); + // when scan is exclusive values are shifted by one on the left + // compared to a inclusive scan + int host_sum = host_vector[host_vector.size()-2]; + + if(device_sum != host_sum){ + std::cout << "ERROR: " + << "device_sum (" << device_sum << ") " + << "!= " + << "host_sum (" << host_sum << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_fill.cpp b/src/boost/libs/compute/perf/perf_fill.cpp new file mode 100644 index 00000000..9f35b6b0 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_fill.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector on the device (filled with zeros) + boost::compute::vector vec(PERF_N, 0, queue); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::fill(vec.begin(), vec.end(), int(trial), queue); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_find.cpp b/src/boost/libs/compute/perf/perf_find.cpp new file mode 100644 index 00000000..3cfb75ad --- /dev/null +++ b/src/boost/libs/compute/perf/perf_find.cpp @@ -0,0 +1,88 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +// Max integer that can be generated by rand_int() function. +int rand_int_max = 25; + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * rand_int_max); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + // trying to find element that isn't in vector (worst-case scenario) + int wanted = rand_int_max + 1; + + // device iterator + boost::compute::vector::iterator device_result_it; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + device_result_it = boost::compute::find(device_vector.begin(), + device_vector.end(), + wanted, + queue); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify if found index is correct by comparing it with std::find() result + size_t host_result_index = std::distance(host_vector.begin(), + std::find(host_vector.begin(), + host_vector.end(), + wanted)); + size_t device_result_index = device_result_it.get_index(); + + if(device_result_index != host_result_index){ + std::cout << "ERROR: " + << "device_result_index (" << device_result_index << ") " + << "!= " + << "host_result_index (" << host_result_index << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_find_end.cpp b/src/boost/libs/compute/perf/perf_find_end.cpp new file mode 100644 index 00000000..e20a7e8d --- /dev/null +++ b/src/boost/libs/compute/perf/perf_find_end.cpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int pattern[] = {2, 6, 6, 7, 8, 4}; + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + boost::compute::vector pattern_vector(pattern, pattern + 6, queue); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::find_end( + device_vector.begin(), device_vector.end(), + pattern_vector.begin(), pattern_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_host_sort.cpp b/src/boost/libs/compute/perf/perf_host_sort.cpp new file mode 100644 index 00000000..d34b1c52 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_host_sort.cpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector random_vector(PERF_N); + std::generate(random_vector.begin(), random_vector.end(), rand); + + // create input vector for gpu + std::vector gpu_vector = random_vector; + + // sort vector on gpu + boost::timer::cpu_timer t; + boost::compute::sort( + gpu_vector.begin(), gpu_vector.end(), queue + ); + queue.finish(); + std::cout << "time: " << t.elapsed().wall / 1e6 << " ms" << std::endl; + + // create input vector for host + std::vector host_vector = random_vector; + + // sort vector on host + t.start(); + std::sort(host_vector.begin(), host_vector.end()); + std::cout << "host time: " << t.elapsed().wall / 1e6 << " ms" << std::endl; + + // ensure that both sorted vectors are equal + if(!std::equal(gpu_vector.begin(), gpu_vector.end(), host_vector.begin())){ + std::cerr << "ERROR: sorted vectors not the same" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_includes.cpp b/src/boost/libs/compute/perf/perf_includes.cpp new file mode 100644 index 00000000..0418a5d7 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_includes.cpp @@ -0,0 +1,68 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vectors of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + std::sort(host_vector.begin(), host_vector.end()); + + // create vectors on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + boost::compute::vector device_vector2(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector2.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::includes( + device_vector.begin(), device_vector.end(), + device_vector2.begin(), device_vector2.end(), + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_inner_product.cpp b/src/boost/libs/compute/perf/perf_inner_product.cpp new file mode 100644 index 00000000..112a4bc6 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_inner_product.cpp @@ -0,0 +1,74 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + std::vector h1(PERF_N); + std::vector h2(PERF_N); + std::generate(h1.begin(), h1.end(), rand_int); + std::generate(h2.begin(), h2.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector d1(PERF_N, context); + boost::compute::vector d2(PERF_N, context); + boost::compute::copy(h1.begin(), h1.end(), d1.begin(), queue); + boost::compute::copy(h2.begin(), h2.end(), d2.begin(), queue); + + int product = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + product = boost::compute::inner_product( + d1.begin(), d1.end(), d2.begin(), int(0), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify product is correct + int host_product = std::inner_product( + h1.begin(), h1.end(), h2.begin(), int(0) + ); + if(product != host_product){ + std::cout << "ERROR: " + << "device_product (" << product << ") " + << "!= " + << "host_product (" << host_product << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_is_permutation.cpp b/src/boost/libs/compute/perf/perf_is_permutation.cpp new file mode 100644 index 00000000..39c72bf1 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_is_permutation.cpp @@ -0,0 +1,66 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + boost::compute::vector device_vector2(PERF_N, context); + boost::compute::copy( + host_vector.rbegin(), host_vector.rend(), device_vector2.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::is_permutation( + device_vector.begin(), device_vector.end(), + device_vector2.begin(), device_vector2.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_is_sorted.cpp b/src/boost/libs/compute/perf/perf_is_sorted.cpp new file mode 100644 index 00000000..f16a3172 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_is_sorted.cpp @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + // sort and then reverse the random vector + boost::compute::sort(device_vector.begin(), device_vector.end(), queue); + boost::compute::reverse(device_vector.begin(), device_vector.end(), queue); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + bool sorted = boost::compute::is_sorted( + device_vector.begin(), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + if(sorted){ + std::cerr << "ERROR: is_sorted() returned true" << std::endl; + } + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_max_element.cpp b/src/boost/libs/compute/perf/perf_max_element.cpp new file mode 100644 index 00000000..0e47c67e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_max_element.cpp @@ -0,0 +1,93 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Rastko Anicic +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast(rand() % 10000000); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + boost::compute::vector::iterator device_max_iter + = device_vector.begin(); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + device_max_iter = boost::compute::max_element( + device_vector.begin(), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + + int device_max = device_max_iter.read(queue); + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "max: " << device_max << std::endl; + + // verify max is correct + std::vector::iterator host_max_iter + = std::max_element(host_vector.begin(), host_vector.end()); + + int host_max = *host_max_iter; + if(device_max != host_max){ + std::cout << "ERROR: " + << "device_max (" << device_max << ") " + << "!= " + << "host_max (" << host_max << ")" + << std::endl; + return -1; + } + + size_t host_max_idx = std::distance(host_vector.begin(), host_max_iter); + size_t device_max_idx = std::distance(device_vector.begin(), device_max_iter); + if(device_max_idx != host_max_idx){ + std::cout << "ERROR: " + << "device_max index (" << device_max_idx << ") " + << "!= " + << "host_max index (" << host_max_idx << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_merge.cpp b/src/boost/libs/compute/perf/perf_merge.cpp new file mode 100644 index 00000000..58ea836f --- /dev/null +++ b/src/boost/libs/compute/perf/perf_merge.cpp @@ -0,0 +1,69 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + std::vector v1 = generate_random_vector(std::floor(PERF_N / 2.0)); + std::vector v2 = generate_random_vector(std::ceil(PERF_N / 2.0)); + std::vector v3(PERF_N); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + boost::compute::vector gpu_v1(v1.begin(), v1.end(), queue); + boost::compute::vector gpu_v2(v2.begin(), v2.end(), queue); + boost::compute::vector gpu_v3(PERF_N, context); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::merge(gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin(), + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + std::vector check_v3(PERF_N); + boost::compute::copy(gpu_v3.begin(), gpu_v3.end(), check_v3.begin(), queue); + queue.finish(); + + std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); + bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin()); + if(!ok){ + std::cerr << "ERROR: merged ranges different" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_next_permutation.cpp b/src/boost/libs/compute/perf/perf_next_permutation.cpp new file mode 100644 index 00000000..62f0bbb0 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_next_permutation.cpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + std::sort(host_vector.begin(), host_vector.end(), std::greater()); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::next_permutation( + device_vector.begin(), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + boost::compute::prev_permutation( + device_vector.begin(), device_vector.end(), queue + ); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_nth_element.cpp b/src/boost/libs/compute/perf/perf_nth_element.cpp new file mode 100644 index 00000000..07627cb4 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_nth_element.cpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::nth_element( + device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_partial_sum.cpp b/src/boost/libs/compute/perf/perf_partial_sum.cpp new file mode 100644 index 00000000..1c02b4c4 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_partial_sum.cpp @@ -0,0 +1,97 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + using boost::compute::int_; + + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::vector device_res(PERF_N,context); + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + // sum vector + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + t.start(); + boost::compute::partial_sum( + device_vector.begin(), + device_vector.end(), + device_res.begin(), + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify sum is correct + std::partial_sum( + host_vector.begin(), + host_vector.end(), + host_vector.begin() + ); + + int device_sum = device_res.back(); + int host_sum = host_vector.back(); + + if(device_sum != host_sum){ + std::cout << "ERROR: " + << "device_sum (" << device_sum << ") " + << "!= " + << "host_sum (" << host_sum << ")" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_partition.cpp b/src/boost/libs/compute/perf/perf_partition.cpp new file mode 100644 index 00000000..b765d736 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_partition.cpp @@ -0,0 +1,66 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + using boost::compute::_1; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + t.start(); + boost::compute::partition( + device_vector.begin(), device_vector.end(), _1 < 10, queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_partition_point.cpp b/src/boost/libs/compute/perf/perf_partition_point.cpp new file mode 100644 index 00000000..91261aa1 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_partition_point.cpp @@ -0,0 +1,68 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + using boost::compute::_1; + boost::compute::partition( + device_vector.begin(), device_vector.end(), _1 < 20, queue + ); + + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::partition_point( + device_vector.begin(), device_vector.end(), _1 < 20, queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_prev_permutation.cpp b/src/boost/libs/compute/perf/perf_prev_permutation.cpp new file mode 100644 index 00000000..bb7d76d2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_prev_permutation.cpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + std::sort(host_vector.begin(), host_vector.end()); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::prev_permutation( + device_vector.begin(), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + boost::compute::next_permutation( + device_vector.begin(), device_vector.end(), queue + ); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_random_number_engine.cpp b/src/boost/libs/compute/perf/perf_random_number_engine.cpp new file mode 100644 index 00000000..db25d437 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_random_number_engine.cpp @@ -0,0 +1,101 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include + +#include + +#include +#include +#include + +#include "perf.hpp" + +namespace compute = boost::compute; +namespace po = boost::program_options; + +template +void perf_random_number_engine(const size_t size, + const size_t trials, + compute::command_queue& queue) +{ + typedef typename Engine::result_type T; + + // create random number engine + Engine engine(queue); + + // create vector on the device + std::cout << "size = " << size << std::endl; + compute::vector vector(size, queue.get_context()); + + // generate random numbers + perf_timer t; + for(size_t i = 0; i < trials; i++){ + t.start(); + engine.generate(vector.begin(), vector.end(), queue); + queue.finish(); + t.stop(); + } + + // print result + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "rate: " << perf_rate(size, t.min_time()) << " MB/s" << std::endl; +} + +int main(int argc, char *argv[]) +{ + // setup and parse command line options + po::options_description options("options"); + options.add_options() + ("help", "show usage instructions") + ("size", po::value()->default_value(8192), "number of values") + ("trials", po::value()->default_value(3), "number of trials") + ("engine", po::value()->default_value("default_random_engine"), "random number engine") + ; + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, options), vm); + po::notify(vm); + + if(vm.count("help")) { + std::cout << options << std::endl; + return 0; + } + + // setup context and queue for the default device + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + // get command line options + const size_t size = vm["size"].as(); + const size_t trials = vm["trials"].as(); + const std::string& engine = vm["engine"].as(); + + // run benchmark + if(engine == "default_random_engine"){ + perf_random_number_engine(size, trials, queue); + } + else if(engine == "mersenne_twister_engine"){ + perf_random_number_engine(size, trials, queue); + } + else if(engine == "linear_congruential_engine"){ + perf_random_number_engine >(size, trials, queue); + } + else if(engine == "threefry_engine"){ + perf_random_number_engine >(size, trials, queue); + } + else { + std::cerr << "error: unknown random number engine '" << engine << "'" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_reduce_by_key.cpp b/src/boost/libs/compute/perf/perf_reduce_by_key.cpp new file mode 100644 index 00000000..c88d450e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_reduce_by_key.cpp @@ -0,0 +1,114 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +struct unique_key { + int current; + int avgValuesNoPerKey; + + unique_key() + { + current = 0; + avgValuesNoPerKey = 512; + } + + int operator()() + { + double p = double(1.0) / static_cast(avgValuesNoPerKey); + if((rand() / double(RAND_MAX)) <= p) + return ++current; + return current; + } +} UniqueKey; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of keys and random values + std::vector host_keys(PERF_N); + std::vector host_values(PERF_N); + std::generate(host_keys.begin(), host_keys.end(), UniqueKey); + std::generate(host_values.begin(), host_values.end(), rand_int); + + // create vectors for keys and values on the device and copy the data + boost::compute::vector device_keys(PERF_N, context); + boost::compute::vector device_values(PERF_N,context); + boost::compute::copy( + host_keys.begin(), + host_keys.end(), + device_keys.begin(), + queue + ); + boost::compute::copy( + host_values.begin(), + host_values.end(), + device_values.begin(), + queue + ); + + // vectors for the results + boost::compute::vector device_keys_results(PERF_N, context); + boost::compute::vector device_values_results(PERF_N,context); + + typedef boost::compute::vector::iterator iterType; + std::pair result( + device_keys_results.begin(), + device_values_results.begin() + ); + + // reduce by key + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + result = boost::compute::reduce_by_key(device_keys.begin(), + device_keys.end(), + device_values.begin(), + device_keys_results.begin(), + device_values_results.begin(), + queue); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + size_t result_size = std::distance(device_keys_results.begin(), result.first); + if(result_size != static_cast(host_keys[PERF_N-1] + 1)){ + std::cout << "ERROR: " + << "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1) + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_reverse.cpp b/src/boost/libs/compute/perf/perf_reverse.cpp new file mode 100644 index 00000000..64369a78 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_reverse.cpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::reverse( + device_vector.begin(), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_reverse_copy.cpp b/src/boost/libs/compute/perf/perf_reverse_copy.cpp new file mode 100644 index 00000000..5ce01c9b --- /dev/null +++ b/src/boost/libs/compute/perf/perf_reverse_copy.cpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + // create vector on the device for reversed data + boost::compute::vector device_reversed_vector(PERF_N, context); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::reverse_copy( + device_vector.begin(), device_vector.end(), + device_reversed_vector.begin(), + queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_rotate.cpp b/src/boost/libs/compute/perf/perf_rotate.cpp new file mode 100644 index 00000000..49f85d57 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_rotate.cpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::rotate( + device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_rotate_copy.cpp b/src/boost/libs/compute/perf/perf_rotate_copy.cpp new file mode 100644 index 00000000..97111ef5 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_rotate_copy.cpp @@ -0,0 +1,62 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + boost::compute::vector device_vector2(PERF_N, context); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::rotate_copy( + device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), device_vector2.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_saxpy.cpp b/src/boost/libs/compute/perf/perf_saxpy.cpp new file mode 100644 index 00000000..99ffc55d --- /dev/null +++ b/src/boost/libs/compute/perf/perf_saxpy.cpp @@ -0,0 +1,162 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +namespace po = boost::program_options; +namespace compute = boost::compute; + +float rand_float() +{ + return (float(rand()) / float(RAND_MAX)) * 1000.f; +} + +template +double perf_saxpy(const compute::vector& x, + const compute::vector& y, + const T alpha, + const size_t trials, + compute::command_queue& queue) +{ + // create vector on the device to store the result + compute::vector result(x.size(), queue.get_context()); + + perf_timer t; + for(size_t trial = 0; trial < trials; trial++){ + compute::fill(result.begin(), result.end(), T(0), queue); + queue.finish(); + + t.start(); + + using compute::lambda::_1; + using compute::lambda::_2; + + compute::transform( + x.begin(), x.end(), y.begin(), result.begin(), alpha * _1 + _2, queue + ); + + queue.finish(); + t.stop(); + } + + return t.min_time(); +} + +template +void tune_saxpy(const compute::vector& x, + const compute::vector& y, + const T alpha, + const size_t trials, + compute::command_queue& queue) +{ + boost::shared_ptr + params = compute::detail::parameter_cache::get_global_cache(queue.get_device()); + + const std::string cache_key = + std::string("__boost_copy_kernel_") + boost::lexical_cast(sizeof(T)); + + const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 }; + const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + + double min_time = (std::numeric_limits::max)(); + compute::uint_ best_tpb = 0; + compute::uint_ best_vpt = 0; + + for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){ + params->set(cache_key, "tpb", tpbs[i]); + for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){ + params->set(cache_key, "vpt", vpts[j]); + + try { + const double t = perf_saxpy(x, y, alpha, trials, queue); + if(t < min_time){ + best_tpb = tpbs[i]; + best_vpt = vpts[j]; + min_time = t; + } + } + catch(compute::opencl_error&){ + // invalid parameters for this device, skip + } + } + } + + // store optimal parameters + params->set(cache_key, "tpb", best_tpb); + params->set(cache_key, "vpt", best_vpt); +} + +int main(int argc, char *argv[]) +{ + // setup command line arguments + po::options_description options("options"); + options.add_options() + ("help", "show usage instructions") + ("size", po::value()->default_value(8192), "input size") + ("trials", po::value()->default_value(3), "number of trials to run") + ("tune", "run tuning procedure") + ("alpha", po::value()->default_value(2.5), "saxpy alpha value") + ; + po::positional_options_description positional_options; + positional_options.add("size", 1); + + // parse command line + po::variables_map vm; + po::store( + po::command_line_parser(argc, argv) + .options(options).positional(positional_options).run(), + vm + ); + po::notify(vm); + + const size_t size = vm["size"].as(); + const size_t trials = vm["trials"].as(); + const float alpha = vm["alpha"].as(); + std::cout << "size: " << size << std::endl; + + // setup context and queue for the default device + compute::device device = boost::compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_x(size); + std::vector host_y(size); + std::generate(host_x.begin(), host_x.end(), rand_float); + std::generate(host_y.begin(), host_y.end(), rand_float); + + // create vector on the device and copy the data + compute::vector x(host_x.begin(), host_x.end(), queue); + compute::vector y(host_y.begin(), host_y.end(), queue); + + // run tuning proceure (if requested) + if(vm.count("tune")){ + tune_saxpy(x, y, alpha, trials, queue); + } + + // run benchmark + double t = perf_saxpy(x, y, alpha, trials, queue); + std::cout << "time: " << t / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_search.cpp b/src/boost/libs/compute/perf/perf_search.cpp new file mode 100644 index 00000000..b76e9755 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_search.cpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int pattern[] = {2, 6, 6, 7, 8, 4}; + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + boost::compute::vector pattern_vector(pattern, pattern + 6, queue); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::search( + device_vector.begin(), device_vector.end(), + pattern_vector.begin(), pattern_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_search_n.cpp b/src/boost/libs/compute/perf/perf_search_n.cpp new file mode 100644 index 00000000..31fa0adb --- /dev/null +++ b/src/boost/libs/compute/perf/perf_search_n.cpp @@ -0,0 +1,61 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::search_n( + device_vector.begin(), device_vector.end(), + 5, 2, queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_set_difference.cpp b/src/boost/libs/compute/perf/perf_set_difference.cpp new file mode 100644 index 00000000..b1b28573 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_set_difference.cpp @@ -0,0 +1,75 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vectors of random numbers on the host + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + // create vectors on the device and copy the data + boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); + boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); + + boost::compute::copy( + v1.begin(), v1.end(), gpu_v1.begin(), queue + ); + boost::compute::copy( + v2.begin(), v2.end(), gpu_v2.begin(), queue + ); + + boost::compute::vector gpu_v3(PERF_N, context); + boost::compute::vector::iterator gpu_v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + gpu_v3_end = boost::compute::set_difference( + gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_set_intersection.cpp b/src/boost/libs/compute/perf/perf_set_intersection.cpp new file mode 100644 index 00000000..dbfeb42f --- /dev/null +++ b/src/boost/libs/compute/perf/perf_set_intersection.cpp @@ -0,0 +1,75 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vectors of random numbers on the host + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + // create vectors on the device and copy the data + boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); + boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); + + boost::compute::copy( + v1.begin(), v1.end(), gpu_v1.begin(), queue + ); + boost::compute::copy( + v2.begin(), v2.end(), gpu_v2.begin(), queue + ); + + boost::compute::vector gpu_v3(PERF_N, context); + boost::compute::vector::iterator gpu_v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + gpu_v3_end = boost::compute::set_intersection( + gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp b/src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp new file mode 100644 index 00000000..9449c585 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp @@ -0,0 +1,75 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vectors of random numbers on the host + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + // create vectors on the device and copy the data + boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); + boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); + + boost::compute::copy( + v1.begin(), v1.end(), gpu_v1.begin(), queue + ); + boost::compute::copy( + v2.begin(), v2.end(), gpu_v2.begin(), queue + ); + + boost::compute::vector gpu_v3(PERF_N, context); + boost::compute::vector::iterator gpu_v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + gpu_v3_end = boost::compute::set_symmetric_difference( + gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_set_union.cpp b/src/boost/libs/compute/perf/perf_set_union.cpp new file mode 100644 index 00000000..3a336cb6 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_set_union.cpp @@ -0,0 +1,75 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vectors of random numbers on the host + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + // create vectors on the device and copy the data + boost::compute::vector gpu_v1(std::floor(PERF_N / 2.0), context); + boost::compute::vector gpu_v2(std::ceil(PERF_N / 2.0), context); + + boost::compute::copy( + v1.begin(), v1.end(), gpu_v1.begin(), queue + ); + boost::compute::copy( + v2.begin(), v2.end(), gpu_v2.begin(), queue + ); + + boost::compute::vector gpu_v3(PERF_N, context); + boost::compute::vector::iterator gpu_v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + gpu_v3_end = boost::compute::set_union( + gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_sort.cpp b/src/boost/libs/compute/perf/perf_sort.cpp new file mode 100644 index 00000000..458203ed --- /dev/null +++ b/src/boost/libs/compute/perf/perf_sort.cpp @@ -0,0 +1,130 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +namespace po = boost::program_options; +namespace compute = boost::compute; + +template +double perf_sort(const std::vector& data, + const size_t trials, + compute::command_queue& queue) +{ + compute::vector vec(data.size(), queue.get_context()); + + perf_timer t; + for(size_t trial = 0; trial < trials; trial++){ + compute::copy(data.begin(), data.end(), vec.begin(), queue); + t.start(); + compute::sort(vec.begin(), vec.end(), queue); + queue.finish(); + t.stop(); + + if(!compute::is_sorted(vec.begin(), vec.end(), queue)){ + std::cerr << "ERROR: is_sorted() returned false" << std::endl; + } + } + return t.min_time(); +} + +template +void tune_sort(const std::vector& data, + const size_t trials, + compute::command_queue& queue) +{ + boost::shared_ptr + params = compute::detail::parameter_cache::get_global_cache(queue.get_device()); + + const std::string cache_key = + std::string("__boost_radix_sort_") + compute::type_name(); + + const compute::uint_ tpbs[] = { 32, 64, 128, 256, 512, 1024 }; + + double min_time = (std::numeric_limits::max)(); + compute::uint_ best_tpb = 0; + + for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){ + params->set(cache_key, "tpb", tpbs[i]); + + try { + const double t = perf_sort(data, trials, queue); + if(t < min_time){ + best_tpb = tpbs[i]; + min_time = t; + } + } + catch(compute::opencl_error&){ + // invalid work group size for this device, skip + } + } + + // store optimal parameters + params->set(cache_key, "tpb", best_tpb); +} + +int main(int argc, char *argv[]) +{ + // setup command line arguments + po::options_description options("options"); + options.add_options() + ("help", "show usage instructions") + ("size", po::value()->default_value(8192), "input size") + ("trials", po::value()->default_value(3), "number of trials to run") + ("tune", "run tuning procedure") + ; + po::positional_options_description positional_options; + positional_options.add("size", 1); + + // parse command line + po::variables_map vm; + po::store( + po::command_line_parser(argc, argv) + .options(options).positional(positional_options).run(), + vm + ); + po::notify(vm); + + const size_t size = vm["size"].as(); + const size_t trials = vm["trials"].as(); + std::cout << "size: " << size << std::endl; + + // setup context and queue for the default device + compute::device device = boost::compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector data(size); + std::generate(data.begin(), data.end(), rand); + + // run tuning proceure (if requested) + if(vm.count("tune")){ + tune_sort(data, trials, queue); + } + + // run sort benchmark + double t = perf_sort(data, trials, queue); + std::cout << "time: " << t / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_sort_by_key.cpp b/src/boost/libs/compute/perf/perf_sort_by_key.cpp new file mode 100644 index 00000000..57c3fc83 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_sort_by_key.cpp @@ -0,0 +1,79 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + using boost::compute::int_; + using boost::compute::long_; + + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_keys(PERF_N); + std::generate(host_keys.begin(), host_keys.end(), rand); + std::vector host_values(PERF_N); + std::copy(host_keys.begin(), host_keys.end(), host_values.begin()); + + // create vector on the device and copy the data + boost::compute::vector device_keys(PERF_N, context); + boost::compute::vector device_values(PERF_N, context); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_keys.begin(), host_keys.end(), device_keys.begin(), queue + ); + boost::compute::copy( + host_values.begin(), host_values.end(), device_values.begin(), queue + ); + + t.start(); + // sort vector + boost::compute::sort_by_key( + device_keys.begin(), device_keys.end(), device_values.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify keys are sorted + if(!boost::compute::is_sorted(device_keys.begin(), device_keys.end(), queue)){ + std::cout << "ERROR: is_sorted() returned false for the keys" << std::endl; + return -1; + } + // verify values are sorted + if(!boost::compute::is_sorted(device_values.begin(), device_values.end(), queue)){ + std::cout << "ERROR: is_sorted() returned false for the values" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_sort_float.cpp b/src/boost/libs/compute/perf/perf_sort_float.cpp new file mode 100644 index 00000000..1b2d5f5e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_sort_float.cpp @@ -0,0 +1,72 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +float rand_float() +{ + return ((rand() / float(RAND_MAX)) - 0.5f) * 100000.0f; +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_float); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), + host_vector.end(), + device_vector.begin(), + queue + ); + + // sort vector + perf_timer t; + t.start(); + boost::compute::sort( + device_vector.begin(), + device_vector.end(), + queue + ); + queue.finish(); + t.stop(); + std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl; + + // verify vector is sorted + if(!boost::compute::is_sorted(device_vector.begin(), + device_vector.end(), + queue)){ + std::cout << "ERROR: is_sorted() returned false" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stable_partition.cpp b/src/boost/libs/compute/perf/perf_stable_partition.cpp new file mode 100644 index 00000000..f7ef1063 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stable_partition.cpp @@ -0,0 +1,62 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + using boost::compute::_1; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + boost::compute::stable_partition( + device_vector.begin(), device_vector.end(), _1 < 10, queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_accumulate.cpp b/src/boost/libs/compute/perf/perf_stl_accumulate.cpp new file mode 100644 index 00000000..c28d2d03 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_accumulate.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int sum = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "sum: " << sum << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_count.cpp b/src/boost/libs/compute/perf/perf_stl_count.cpp new file mode 100644 index 00000000..9df13f86 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_count.cpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // count values equal to four in the vector + size_t count = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + count = std::count( + host_vector.begin(), host_vector.end(), 4 + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "count: " << count << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_find.cpp b/src/boost/libs/compute/perf/perf_stl_find.cpp new file mode 100644 index 00000000..b5d3eed2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_find.cpp @@ -0,0 +1,58 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +// Max integer that can be generated by rand_int() function. +int rand_int_max = 25; + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * rand_int_max); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // trying to find element that isn't in vector (worst-case scenario) + int wanted = rand_int_max + 1; + + // result + std::vector::iterator host_result_it; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify + if(host_result_it != host_vector.end()){ + std::cout << "ERROR: " + << "host_result_iterator != " + << "host_vector.end()" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_find_end.cpp b/src/boost/libs/compute/perf/perf_stl_find_end.cpp new file mode 100644 index 00000000..cb1233b1 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_find_end.cpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int pattern[] = {2, 6, 6, 7, 8, 4}; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::find_end(host_vector.begin(), host_vector.end(), + pattern, pattern + 6); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_includes.cpp b/src/boost/libs/compute/perf/perf_stl_includes.cpp new file mode 100644 index 00000000..b71dbf3c --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_includes.cpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + std::vector v1(PERF_N); + std::generate(v1.begin(), v1.end(), rand_int); + + std::vector v2(v1); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::includes( + v1.begin(), v1.end(), + v2.begin(), v2.end() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_inner_product.cpp b/src/boost/libs/compute/perf/perf_stl_inner_product.cpp new file mode 100644 index 00000000..884f06c9 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_inner_product.cpp @@ -0,0 +1,46 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + std::vector h1(PERF_N); + std::vector h2(PERF_N); + std::generate(h1.begin(), h1.end(), rand_int); + std::generate(h2.begin(), h2.end(), rand_int); + + int product = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + product = std::inner_product( + h1.begin(), h1.end(), h2.begin(), int(0) + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "product: " << product << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_is_permutation.cpp b/src/boost/libs/compute/perf/perf_stl_is_permutation.cpp new file mode 100644 index 00000000..1384e8e9 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_is_permutation.cpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + std::vector host_vector2(PERF_N); + std::copy(host_vector.rbegin(), host_vector.rend(), host_vector2.begin()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::is_permutation(host_vector.begin(), host_vector.end(), + host_vector2.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_max_element.cpp b/src/boost/libs/compute/perf/perf_stl_max_element.cpp new file mode 100644 index 00000000..3fa61267 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_max_element.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Rastko Anicic +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast(rand() % 10000000); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int max = 0; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + max = *(std::max_element(host_vector.begin(), host_vector.end())); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "max: " << max << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_merge.cpp b/src/boost/libs/compute/perf/perf_stl_merge.cpp new file mode 100644 index 00000000..0a842a04 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_merge.cpp @@ -0,0 +1,38 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + std::vector v1 = generate_random_vector(std::floor(PERF_N / 2.0)); + std::vector v2 = generate_random_vector(std::ceil(PERF_N / 2.0)); + std::vector v3(PERF_N); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_next_permutation.cpp b/src/boost/libs/compute/perf/perf_stl_next_permutation.cpp new file mode 100644 index 00000000..22148975 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_next_permutation.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + std::sort(host_vector.begin(), host_vector.end(), std::greater()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::next_permutation(host_vector.begin(), host_vector.end()); + t.stop(); + std::prev_permutation(host_vector.begin(), host_vector.end()); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_partial_sum.cpp b/src/boost/libs/compute/perf/perf_stl_partial_sum.cpp new file mode 100644 index 00000000..533defb2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_partial_sum.cpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + using boost::compute::int_; + + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector v(PERF_N); + std::vector r(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + std::generate(v.begin(), v.end(), rand_int); + t.start(); + std::partial_sum( + v.begin(), + v.end(), + r.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_partition.cpp b/src/boost/libs/compute/perf/perf_stl_partition.cpp new file mode 100644 index 00000000..56aadd0d --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_partition.cpp @@ -0,0 +1,46 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +bool less_than_10(int value) +{ + return value < 10; +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::partition(host_vector.begin(), host_vector.end(), less_than_10); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_partition_point.cpp b/src/boost/libs/compute/perf/perf_stl_partition_point.cpp new file mode 100644 index 00000000..94b1c263 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_partition_point.cpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +bool less_than_20(int value) +{ + return value < 20; +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + std::partition(host_vector.begin(), host_vector.end(), + less_than_20); + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::partition_point(host_vector.begin(), host_vector.end(), + less_than_20); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp b/src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp new file mode 100644 index 00000000..f246ba2a --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + std::sort(host_vector.begin(), host_vector.end()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::prev_permutation(host_vector.begin(), host_vector.end()); + t.stop(); + std::next_permutation(host_vector.begin(), host_vector.end()); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_reverse.cpp b/src/boost/libs/compute/perf/perf_stl_reverse.cpp new file mode 100644 index 00000000..b2ee2ef6 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_reverse.cpp @@ -0,0 +1,41 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::reverse(host_vector.begin(), host_vector.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp b/src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp new file mode 100644 index 00000000..1397e9a7 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector for reversed data + std::vector host_reversed_vector(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::reverse_copy(host_vector.begin(), host_vector.end(), + host_reversed_vector.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_rotate.cpp b/src/boost/libs/compute/perf/perf_stl_rotate.cpp new file mode 100644 index 00000000..f90acef2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_rotate.cpp @@ -0,0 +1,41 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::rotate(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp b/src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp new file mode 100644 index 00000000..516d11c2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + std::vector host_vector2(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::rotate_copy(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end(), host_vector2.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_saxpy.cpp b/src/boost/libs/compute/perf/perf_stl_saxpy.cpp new file mode 100644 index 00000000..8ab33535 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_saxpy.cpp @@ -0,0 +1,52 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +float rand_float() +{ + return (float(rand()) / float(RAND_MAX)) * 1000.f; +} + +// y <- alpha * x + y +void serial_saxpy(size_t n, float alpha, const float *x, float *y) +{ + for(size_t i = 0; i < n; i++){ + y[i] = alpha * x[i] + y[i]; + } +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + float alpha = 2.5f; + + std::vector host_x(PERF_N); + std::vector host_y(PERF_N); + std::generate(host_x.begin(), host_x.end(), rand_float); + std::generate(host_y.begin(), host_y.end(), rand_float); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + serial_saxpy(PERF_N, alpha, &host_x[0], &host_y[0]); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_search.cpp b/src/boost/libs/compute/perf/perf_stl_search.cpp new file mode 100644 index 00000000..8166d35e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_search.cpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int pattern[] = {2, 6, 6, 7, 8, 4}; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::search(host_vector.begin(), host_vector.end(), + pattern, pattern + 6); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_search_n.cpp b/src/boost/libs/compute/perf/perf_stl_search_n.cpp new file mode 100644 index 00000000..76a6bb07 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_search_n.cpp @@ -0,0 +1,41 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::search_n(host_vector.begin(), host_vector.end(), 5, 2); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_set_difference.cpp b/src/boost/libs/compute/perf/perf_stl_set_difference.cpp new file mode 100644 index 00000000..c5d0802d --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_set_difference.cpp @@ -0,0 +1,54 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + std::vector v3(PERF_N); + std::vector::iterator v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + v3_end = std::set_difference( + v1.begin(), v1.end(), + v2.begin(), v2.end(), + v3.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_set_intersection.cpp b/src/boost/libs/compute/perf/perf_stl_set_intersection.cpp new file mode 100644 index 00000000..6aa3493b --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_set_intersection.cpp @@ -0,0 +1,54 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + std::vector v3(PERF_N); + std::vector::iterator v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + v3_end = std::set_intersection( + v1.begin(), v1.end(), + v2.begin(), v2.end(), + v3.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp b/src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp new file mode 100644 index 00000000..c22c70a7 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp @@ -0,0 +1,54 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + std::vector v3(PERF_N); + std::vector::iterator v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + v3_end = std::set_symmetric_difference( + v1.begin(), v1.end(), + v2.begin(), v2.end(), + v3.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_set_union.cpp b/src/boost/libs/compute/perf/perf_stl_set_union.cpp new file mode 100644 index 00000000..9d4fe3a8 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_set_union.cpp @@ -0,0 +1,54 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + std::vector v1(std::floor(PERF_N / 2.0)); + std::vector v2(std::ceil(PERF_N / 2.0)); + + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + std::vector v3(PERF_N); + std::vector::iterator v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + v3_end = std::set_union( + v1.begin(), v1.end(), + v2.begin(), v2.end(), + v3.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_sort.cpp b/src/boost/libs/compute/perf/perf_stl_sort.cpp new file mode 100644 index 00000000..c9d4294b --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_sort.cpp @@ -0,0 +1,33 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + std::vector v; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + v = generate_random_vector(PERF_N); + t.start(); + std::sort(v.begin(), v.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_stable_partition.cpp b/src/boost/libs/compute/perf/perf_stl_stable_partition.cpp new file mode 100644 index 00000000..ee4993cc --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_stable_partition.cpp @@ -0,0 +1,47 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +bool less_than_10(int value) +{ + return value < 10; +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + std::stable_partition(host_vector.begin(), host_vector.end(), + less_than_10); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_unique.cpp b/src/boost/libs/compute/perf/perf_stl_unique.cpp new file mode 100644 index 00000000..f4f97b4b --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_unique.cpp @@ -0,0 +1,41 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + std::generate(host_vector.begin(), host_vector.end(), rand_int); + t.start(); + std::unique(host_vector.begin(), host_vector.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_stl_unique_copy.cpp b/src/boost/libs/compute/perf/perf_stl_unique_copy.cpp new file mode 100644 index 00000000..77705fa2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_stl_unique_copy.cpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::vector host_vector2(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + std::generate(host_vector.begin(), host_vector.end(), rand_int); + t.start(); + std::unique_copy( + host_vector.begin(), host_vector.end(), host_vector2.begin() + ); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_tbb_accumulate.cpp b/src/boost/libs/compute/perf/perf_tbb_accumulate.cpp new file mode 100644 index 00000000..319ad8af --- /dev/null +++ b/src/boost/libs/compute/perf/perf_tbb_accumulate.cpp @@ -0,0 +1,75 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +template +struct Sum { + T value; + Sum() : value(0) {} + Sum( Sum& s, tbb::split ) {value = 0;} + void operator()( const tbb::blocked_range& r ) { + T temp = value; + for( T* a=r.begin(); a!=r.end(); ++a ) { + temp += *a; + } + value = temp; + } + void join( Sum& rhs ) {value += rhs.value;} +}; + +template +T ParallelSum( T array[], size_t n ) { + Sum total; + tbb::parallel_reduce( tbb::blocked_range( array, array+n ), + total ); + return total.value; +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + int sum = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + sum = ParallelSum(&host_vector[0], host_vector.size()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "sum: " << sum << std::endl; + + int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0)); + if(sum != host_sum){ + std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_tbb_merge.cpp b/src/boost/libs/compute/perf/perf_tbb_merge.cpp new file mode 100644 index 00000000..a7aa814e --- /dev/null +++ b/src/boost/libs/compute/perf/perf_tbb_merge.cpp @@ -0,0 +1,95 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include + +#include "perf.hpp" + +// example from: http://www.threadingbuildingblocks.org/docs/help/reference/algorithms/parallel_for_func.htm +using namespace tbb; + +template +struct ParallelMergeRange { + static size_t grainsize; + Iterator begin1, end1; // [begin1,end1) is 1st sequence to be merged + Iterator begin2, end2; // [begin2,end2) is 2nd sequence to be merged + Iterator out; // where to put merged sequence + bool empty() const {return (end1-begin1)+(end2-begin2)==0;} + bool is_divisible() const { + return (std::min)( end1-begin1, end2-begin2 ) > grainsize; + } + ParallelMergeRange( ParallelMergeRange& r, split ) { + if( r.end1-r.begin1 < r.end2-r.begin2 ) { + std::swap(r.begin1,r.begin2); + std::swap(r.end1,r.end2); + } + Iterator m1 = r.begin1 + (r.end1-r.begin1)/2; + Iterator m2 = std::lower_bound( r.begin2, r.end2, *m1 ); + begin1 = m1; + begin2 = m2; + end1 = r.end1; + end2 = r.end2; + out = r.out + (m1-r.begin1) + (m2-r.begin2); + r.end1 = m1; + r.end2 = m2; + } + ParallelMergeRange( Iterator begin1_, Iterator end1_, + Iterator begin2_, Iterator end2_, + Iterator out_ ) : + begin1(begin1_), end1(end1_), + begin2(begin2_), end2(end2_), out(out_) + {} +}; + +template +size_t ParallelMergeRange::grainsize = 1000; + +template +struct ParallelMergeBody { + void operator()( ParallelMergeRange& r ) const { + std::merge( r.begin1, r.end1, r.begin2, r.end2, r.out ); + } +}; + +template +void ParallelMerge( Iterator begin1, Iterator end1, Iterator begin2, Iterator end2, Iterator out ) { + parallel_for( + ParallelMergeRange(begin1,end1,begin2,end2,out), + ParallelMergeBody(), + simple_partitioner() + ); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + std::vector v1 = generate_random_vector(PERF_N / 2); + std::vector v2 = generate_random_vector(PERF_N / 2); + std::vector v3(PERF_N); + + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + ParallelMerge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_tbb_sort.cpp b/src/boost/libs/compute/perf/perf_tbb_sort.cpp new file mode 100644 index 00000000..2f79b5b2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_tbb_sort.cpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include + +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + std::vector v(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + v = generate_random_vector(PERF_N); + t.start(); + tbb::parallel_sort(v.begin(), v.end()); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_accumulate.cu b/src/boost/libs/compute/perf/perf_thrust_accumulate.cu new file mode 100644 index 00000000..76b72321 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_accumulate.cu @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec = h_vec; + + int sum = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + sum = thrust::reduce(d_vec.begin(), d_vec.end()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "sum: " << sum << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_count.cu b/src/boost/libs/compute/perf/perf_thrust_count.cu new file mode 100644 index 00000000..d69df901 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_count.cu @@ -0,0 +1,49 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + thrust::host_vector host_vector(PERF_N); + thrust::generate(host_vector.begin(), host_vector.end(), rand_int); + + thrust::device_vector v = host_vector; + + size_t count = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + count = thrust::count(v.begin(), v.end(), 4); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "count: " << count << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu b/src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu new file mode 100644 index 00000000..df1367a2 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Benoit +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec = h_vec; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + thrust::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_find.cu b/src/boost/libs/compute/perf/perf_thrust_find.cu new file mode 100644 index 00000000..e1482604 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_find.cu @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +// Max integer that can be generated by rand_int() function. +int rand_int_max = 25; + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * rand_int_max); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // create vector of random numbers on the host + thrust::host_vector host_vector(PERF_N); + thrust::generate(host_vector.begin(), host_vector.end(), rand_int); + + thrust::device_vector v = host_vector; + + // trying to find element that isn't in vector (worst-case scenario) + int wanted = rand_int_max + 1; + + // result + thrust::device_vector::iterator device_result_it; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + device_result_it = thrust::find(v.begin(), v.end(), wanted); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // verify + if(device_result_it != v.end()){ + std::cout << "ERROR: " + << "device_result_iterator != " + << "v.end()" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_inner_product.cu b/src/boost/libs/compute/perf/perf_thrust_inner_product.cu new file mode 100644 index 00000000..6d01fc53 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_inner_product.cu @@ -0,0 +1,49 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector host_x(PERF_N); + thrust::host_vector host_y(PERF_N); + std::generate(host_x.begin(), host_x.end(), rand); + std::generate(host_y.begin(), host_y.end(), rand); + + // transfer data to the device + thrust::device_vector device_x = host_x; + thrust::device_vector device_y = host_y; + + int product = 0; + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + product = thrust::inner_product( + device_x.begin(), device_x.end(), device_y.begin(), 0 + ); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "product: " << product << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_merge.cu b/src/boost/libs/compute/perf/perf_thrust_merge.cu new file mode 100644 index 00000000..f269c939 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_merge.cu @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector v1(std::floor(PERF_N / 2.0)); + thrust::host_vector v2(std::ceil(PERF_N / 2.0)); + std::generate(v1.begin(), v1.end(), rand); + std::generate(v2.begin(), v2.end(), rand); + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + // transfer data to the device + thrust::device_vector gpu_v1 = v1; + thrust::device_vector gpu_v2 = v2; + thrust::device_vector gpu_v3(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + thrust::merge( + gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin() + ); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + thrust::host_vector check_v3 = gpu_v3; + + thrust::host_vector v3(PERF_N); + std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin()); + bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin()); + if(!ok){ + std::cerr << "ERROR: merged ranges different" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_partial_sum.cu b/src/boost/libs/compute/perf/perf_thrust_partial_sum.cu new file mode 100644 index 00000000..e30e80b5 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_partial_sum.cu @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec = h_vec; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + thrust::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_partition.cu b/src/boost/libs/compute/perf/perf_thrust_partition.cu new file mode 100644 index 00000000..5c89014c --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_partition.cu @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +struct less_than_ten : public thrust::unary_function +{ + __device__ bool operator()(int x) const + { + return x < 10; + } +}; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec(PERF_N); + std::generate(h_vec.begin(), h_vec.end(), rand_int); + + thrust::device_vector d_vec(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + thrust::partition( + d_vec.begin(), d_vec.end(), less_than_ten() + ); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu b/src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu new file mode 100644 index 00000000..a445c137 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu @@ -0,0 +1,92 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +struct unique_key { + int current; + int avgValuesNoPerKey; + + unique_key() + { + current = 0; + avgValuesNoPerKey = 512; + } + + int operator()() + { + double p = double(1.0) / static_cast(avgValuesNoPerKey); + if((rand() / double(RAND_MAX)) <= p) + return ++current; + return current; + } +} UniqueKey; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + + // create vector of keys and random values + thrust::host_vector host_keys(PERF_N); + thrust::host_vector host_values(PERF_N); + std::generate(host_keys.begin(), host_keys.end(), UniqueKey); + std::generate(host_values.begin(), host_values.end(), rand_int); + + // transfer data to the device + thrust::device_vector device_keys = host_keys; + thrust::device_vector device_values = host_values; + + // create device vectors for the results + thrust::device_vector device_keys_results(PERF_N); + thrust::device_vector device_values_results(PERF_N); + + typedef typename thrust::device_vector::iterator iterType; + thrust::pair result; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + result = thrust::reduce_by_key(device_keys.begin(), + device_keys.end(), + device_values.begin(), + device_keys_results.begin(), + device_values_results.begin()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + size_t result_size = thrust::distance(device_keys_results.begin(), result.first); + if(result_size != static_cast(host_keys[PERF_N-1] + 1)){ + std::cout << "ERROR: " + << "wrong number of keys" + << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_reverse.cu b/src/boost/libs/compute/perf/perf_thrust_reverse.cu new file mode 100644 index 00000000..1927ca6a --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_reverse.cu @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + thrust::reverse(d_vec.begin(), d_vec.end()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu b/src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu new file mode 100644 index 00000000..af1a044d --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu @@ -0,0 +1,47 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec; + d_vec = h_vec; + + // device vector for reversed data + thrust::device_vector d_reversed_vec(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + thrust::reverse_copy(d_vec.begin(), d_vec.end(), d_reversed_vec.begin()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_rotate.cu b/src/boost/libs/compute/perf/perf_thrust_rotate.cu new file mode 100644 index 00000000..108bb99b --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_rotate.cu @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec; + + size_t rotate_distance = PERF_N / 2; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + // there is no thrust::rotate() so we implement it manually with copy() + thrust::device_vector tmp(d_vec.begin(), d_vec.begin() + rotate_distance); + thrust::copy(d_vec.begin() + rotate_distance, d_vec.end(), d_vec.begin()); + thrust::copy(tmp.begin(), tmp.end(), d_vec.begin() + rotate_distance); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_saxpy.cu b/src/boost/libs/compute/perf/perf_thrust_saxpy.cu new file mode 100644 index 00000000..aa35a191 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_saxpy.cu @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +struct saxpy_functor : public thrust::binary_function +{ + const float a; + + saxpy_functor(float _a) : a(_a) {} + + __host__ __device__ + float operator()(const float& x, const float& y) const + { + return a * x + y; + } +}; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector host_x(PERF_N); + thrust::host_vector host_y(PERF_N); + std::generate(host_x.begin(), host_x.end(), rand); + std::generate(host_y.begin(), host_y.end(), rand); + + // transfer data to the device + thrust::device_vector device_x = host_x; + thrust::device_vector device_y = host_y; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + thrust::transform(device_x.begin(), device_x.end(), device_y.begin(), device_y.begin(), saxpy_functor(2.5f)); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + thrust::copy(device_x.begin(), device_x.end(), host_x.begin()); + thrust::copy(device_y.begin(), device_y.end(), host_y.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_set_difference.cu b/src/boost/libs/compute/perf/perf_thrust_set_difference.cu new file mode 100644 index 00000000..3465f214 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_set_difference.cu @@ -0,0 +1,61 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector v1(std::floor(PERF_N / 2.0)); + thrust::host_vector v2(std::ceil(PERF_N / 2.0)); + std::generate(v1.begin(), v1.end(), rand_int); + std::generate(v2.begin(), v2.end(), rand_int); + std::sort(v1.begin(), v1.end()); + std::sort(v2.begin(), v2.end()); + + // transfer data to the device + thrust::device_vector gpu_v1 = v1; + thrust::device_vector gpu_v2 = v2; + thrust::device_vector gpu_v3(PERF_N); + + thrust::device_vector::iterator gpu_v3_end; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + t.start(); + gpu_v3_end = thrust::set_difference( + gpu_v1.begin(), gpu_v1.end(), + gpu_v2.begin(), gpu_v2.end(), + gpu_v3.begin() + ); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + std::cout << "size: " << thrust::distance(gpu_v3.begin(), gpu_v3_end) << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_sort.cu b/src/boost/libs/compute/perf/perf_thrust_sort.cu new file mode 100644 index 00000000..b2d90939 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_sort.cu @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec = generate_random_vector(PERF_N); + + // transfer data to the device + thrust::device_vector d_vec; + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + thrust::sort(d_vec.begin(), d_vec.end()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + // transfer data back to host + thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin()); + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_thrust_unique.cu b/src/boost/libs/compute/perf/perf_thrust_unique.cu new file mode 100644 index 00000000..6030f291 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_thrust_unique.cu @@ -0,0 +1,50 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + + std::cout << "size: " << PERF_N << std::endl; + thrust::host_vector h_vec(PERF_N); + std::generate(h_vec.begin(), h_vec.end(), rand_int); + + thrust::device_vector d_vec(PERF_N); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + d_vec = h_vec; + + t.start(); + thrust::unique(d_vec.begin(), d_vec.end()); + cudaDeviceSynchronize(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp b/src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp new file mode 100644 index 00000000..57973a81 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp @@ -0,0 +1,46 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include + +#include +#include +#include +#include + +#include "perf.hpp" + +namespace compute = boost::compute; + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + compute::vector vector(PERF_N, context); + + compute::default_random_engine rng(queue); + compute::uniform_int_distribution dist(0, 1); + + perf_timer t; + t.start(); + dist.generate(vector.begin(), vector.end(), rng, queue); + queue.finish(); + t.stop(); + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_unique.cpp b/src/boost/libs/compute/perf/perf_unique.cpp new file mode 100644 index 00000000..b25801f1 --- /dev/null +++ b/src/boost/libs/compute/perf/perf_unique.cpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + t.start(); + boost::compute::unique( + device_vector.begin(), device_vector.end(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perf_unique_copy.cpp b/src/boost/libs/compute/perf/perf_unique_copy.cpp new file mode 100644 index 00000000..d7ff98af --- /dev/null +++ b/src/boost/libs/compute/perf/perf_unique_copy.cpp @@ -0,0 +1,61 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include +#include +#include +#include + +#include +#include +#include + +#include "perf.hpp" + +int rand_int() +{ + return static_cast((rand() / double(RAND_MAX)) * 25.0); +} + +int main(int argc, char *argv[]) +{ + perf_parse_args(argc, argv); + std::cout << "size: " << PERF_N << std::endl; + + // setup context and queue for the default device + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + std::cout << "device: " << device.name() << std::endl; + + // create vector of random numbers on the host + std::vector host_vector(PERF_N); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // create vector on the device and copy the data + boost::compute::vector device_vector(PERF_N, context); + boost::compute::vector device_vector2(PERF_N, context); + + perf_timer t; + for(size_t trial = 0; trial < PERF_TRIALS; trial++){ + boost::compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + t.start(); + boost::compute::unique_copy( + device_vector.begin(), device_vector.end(), device_vector2.begin(), queue + ); + queue.finish(); + t.stop(); + } + std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/perf/perfdoc.py b/src/boost/libs/compute/perf/perfdoc.py new file mode 100755 index 00000000..e9c60362 --- /dev/null +++ b/src/boost/libs/compute/perf/perfdoc.py @@ -0,0 +1,70 @@ +#!/usr/bin/python + +# Copyright (c) 2014 Kyle Lutz +# Distributed under the Boost Software License, Version 1.0 +# See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt +# +# See http://boostorg.github.com/compute for more information. + +import os +import sys +import pylab + +from perf import run_benchmark + +fignum = 0 + +def plot_to_file(report, filename): + global fignum + fignum += 1 + pylab.figure(fignum) + + run_to_label = { + "stl" : "C++ STL", + "thrust" : "Thrust", + "compute" : "Boost.Compute", + "bolt" : "Bolt" + } + + for run in sorted(report.samples.keys()): + x = [] + y = [] + + for sample in report.samples[run]: + x.append(sample[0]) + y.append(sample[1]) + + pylab.loglog(x, y, marker='o', label=run_to_label[run]) + + pylab.xlabel("Size") + pylab.ylabel("Time (ms)") + pylab.legend(loc='upper left') + pylab.savefig(filename) + +if __name__ == '__main__': + sizes = [pow(2, x) for x in range(10, 26)] + algorithms = [ + "accumulate", + "count", + "inner_product", + "merge", + "partial_sum", + "partition", + "reverse", + "rotate", + "saxpy", + "sort", + "unique", + ] + + try: + os.mkdir("perf_plots") + except OSError: + pass + + for algorithm in algorithms: + print("running '%s'" % (algorithm)) + report = run_benchmark(algorithm, sizes, ["stl", "thrust", "bolt"]) + plot_to_file(report, "perf_plots/%s_time_plot.png" % algorithm) + -- cgit v1.2.3