summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/compute/perf
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/boost/libs/compute/perf
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/boost/libs/compute/perf')
-rw-r--r--src/boost/libs/compute/perf/CMakeLists.txt210
-rw-r--r--src/boost/libs/compute/perf/perf.hpp109
-rwxr-xr-xsrc/boost/libs/compute/perf/perf.py238
-rw-r--r--src/boost/libs/compute/perf/perf_accumulate.cpp140
-rw-r--r--src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp46
-rw-r--r--src/boost/libs/compute/perf/perf_binary_find.cpp71
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_accumulate.cpp51
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_count.cpp57
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp52
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_fill.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_inner_product.cpp56
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_max_element.cpp69
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_merge.cpp60
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp53
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp100
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_saxpy.cpp76
-rw-r--r--src/boost/libs/compute/perf/perf_bolt_sort.cpp50
-rw-r--r--src/boost/libs/compute/perf/perf_cart_to_polar.cpp158
-rw-r--r--src/boost/libs/compute/perf/perf_comparison_sort.cpp86
-rw-r--r--src/boost/libs/compute/perf/perf_copy_if.cpp122
-rw-r--r--src/boost/libs/compute/perf/perf_copy_to_device.cpp55
-rw-r--r--src/boost/libs/compute/perf/perf_count.cpp77
-rw-r--r--src/boost/libs/compute/perf/perf_discrete_distribution.cpp48
-rw-r--r--src/boost/libs/compute/perf/perf_erase_remove.cpp61
-rw-r--r--src/boost/libs/compute/perf/perf_exclusive_scan.cpp97
-rw-r--r--src/boost/libs/compute/perf/perf_fill.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_find.cpp88
-rw-r--r--src/boost/libs/compute/perf/perf_find_end.cpp65
-rw-r--r--src/boost/libs/compute/perf/perf_host_sort.cpp65
-rw-r--r--src/boost/libs/compute/perf/perf_includes.cpp68
-rw-r--r--src/boost/libs/compute/perf/perf_inner_product.cpp74
-rw-r--r--src/boost/libs/compute/perf/perf_is_permutation.cpp66
-rw-r--r--src/boost/libs/compute/perf/perf_is_sorted.cpp63
-rw-r--r--src/boost/libs/compute/perf/perf_max_element.cpp93
-rw-r--r--src/boost/libs/compute/perf/perf_merge.cpp69
-rw-r--r--src/boost/libs/compute/perf/perf_next_permutation.cpp65
-rw-r--r--src/boost/libs/compute/perf/perf_nth_element.cpp60
-rw-r--r--src/boost/libs/compute/perf/perf_partial_sum.cpp97
-rw-r--r--src/boost/libs/compute/perf/perf_partition.cpp66
-rw-r--r--src/boost/libs/compute/perf/perf_partition_point.cpp68
-rw-r--r--src/boost/libs/compute/perf/perf_prev_permutation.cpp65
-rw-r--r--src/boost/libs/compute/perf/perf_random_number_engine.cpp101
-rw-r--r--src/boost/libs/compute/perf/perf_reduce_by_key.cpp114
-rw-r--r--src/boost/libs/compute/perf/perf_reverse.cpp60
-rw-r--r--src/boost/libs/compute/perf/perf_reverse_copy.cpp65
-rw-r--r--src/boost/libs/compute/perf/perf_rotate.cpp60
-rw-r--r--src/boost/libs/compute/perf/perf_rotate_copy.cpp62
-rw-r--r--src/boost/libs/compute/perf/perf_saxpy.cpp162
-rw-r--r--src/boost/libs/compute/perf/perf_search.cpp65
-rw-r--r--src/boost/libs/compute/perf/perf_search_n.cpp61
-rw-r--r--src/boost/libs/compute/perf/perf_set_difference.cpp75
-rw-r--r--src/boost/libs/compute/perf/perf_set_intersection.cpp75
-rw-r--r--src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp75
-rw-r--r--src/boost/libs/compute/perf/perf_set_union.cpp75
-rw-r--r--src/boost/libs/compute/perf/perf_sort.cpp130
-rw-r--r--src/boost/libs/compute/perf/perf_sort_by_key.cpp79
-rw-r--r--src/boost/libs/compute/perf/perf_sort_float.cpp72
-rw-r--r--src/boost/libs/compute/perf/perf_stable_partition.cpp62
-rw-r--r--src/boost/libs/compute/perf/perf_stl_accumulate.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_stl_count.cpp45
-rw-r--r--src/boost/libs/compute/perf/perf_stl_find.cpp58
-rw-r--r--src/boost/libs/compute/perf/perf_stl_find_end.cpp44
-rw-r--r--src/boost/libs/compute/perf/perf_stl_includes.cpp48
-rw-r--r--src/boost/libs/compute/perf/perf_stl_inner_product.cpp46
-rw-r--r--src/boost/libs/compute/perf/perf_stl_is_permutation.cpp45
-rw-r--r--src/boost/libs/compute/perf/perf_stl_max_element.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_stl_merge.cpp38
-rw-r--r--src/boost/libs/compute/perf/perf_stl_next_permutation.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_stl_partial_sum.cpp51
-rw-r--r--src/boost/libs/compute/perf/perf_stl_partition.cpp46
-rw-r--r--src/boost/libs/compute/perf/perf_stl_partition_point.cpp48
-rw-r--r--src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_stl_reverse.cpp41
-rw-r--r--src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp45
-rw-r--r--src/boost/libs/compute/perf/perf_stl_rotate.cpp41
-rw-r--r--src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp43
-rw-r--r--src/boost/libs/compute/perf/perf_stl_saxpy.cpp52
-rw-r--r--src/boost/libs/compute/perf/perf_stl_search.cpp44
-rw-r--r--src/boost/libs/compute/perf/perf_stl_search_n.cpp41
-rw-r--r--src/boost/libs/compute/perf/perf_stl_set_difference.cpp54
-rw-r--r--src/boost/libs/compute/perf/perf_stl_set_intersection.cpp54
-rw-r--r--src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp54
-rw-r--r--src/boost/libs/compute/perf/perf_stl_set_union.cpp54
-rw-r--r--src/boost/libs/compute/perf/perf_stl_sort.cpp33
-rw-r--r--src/boost/libs/compute/perf/perf_stl_stable_partition.cpp47
-rw-r--r--src/boost/libs/compute/perf/perf_stl_unique.cpp41
-rw-r--r--src/boost/libs/compute/perf/perf_stl_unique_copy.cpp44
-rw-r--r--src/boost/libs/compute/perf/perf_tbb_accumulate.cpp75
-rw-r--r--src/boost/libs/compute/perf/perf_tbb_merge.cpp95
-rw-r--r--src/boost/libs/compute/perf/perf_tbb_sort.cpp35
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_accumulate.cu45
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_count.cu49
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu48
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_find.cu65
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_inner_product.cu49
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_merge.cu63
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_partial_sum.cu48
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_partition.cu60
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu92
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_reverse.cu48
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu47
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_rotate.cu51
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_saxpy.cu63
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_set_difference.cu61
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_sort.cu48
-rw-r--r--src/boost/libs/compute/perf/perf_thrust_unique.cu50
-rw-r--r--src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp46
-rw-r--r--src/boost/libs/compute/perf/perf_unique.cpp60
-rw-r--r--src/boost/libs/compute/perf/perf_unique_copy.cpp61
-rwxr-xr-xsrc/boost/libs/compute/perf/perfdoc.py70
110 files changed, 7351 insertions, 0 deletions
diff --git a/src/boost/libs/compute/perf/CMakeLists.txt b/src/boost/libs/compute/perf/CMakeLists.txt
new file mode 100644
index 00000000..b04429e9
--- /dev/null
+++ b/src/boost/libs/compute/perf/CMakeLists.txt
@@ -0,0 +1,210 @@
+# ---------------------------------------------------------------------------
+# Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+#
+# Distributed under the Boost Software License, Version 1.0
+# See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt
+#
+# ---------------------------------------------------------------------------
+
+include_directories(../include)
+
+set(PERF_BOOST_COMPONENTS system timer chrono program_options)
+
+if (${BOOST_COMPUTE_USE_OFFLINE_CACHE})
+ set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} filesystem)
+endif()
+
+if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11})
+ set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread)
+elseif(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
+ set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread)
+endif()
+
+if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
+ set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} date_time)
+endif()
+
+if(PERF_BOOST_COMPONENTS)
+ list(REMOVE_DUPLICATES PERF_BOOST_COMPONENTS)
+endif()
+find_package(Boost 1.54 REQUIRED COMPONENTS ${PERF_BOOST_COMPONENTS})
+include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
+
+set(BENCHMARKS
+ accumulate
+ bernoulli_distribution
+ binary_find
+ cart_to_polar
+ comparison_sort
+ copy_if
+ copy_to_device
+ count
+ discrete_distribution
+ erase_remove
+ exclusive_scan
+ fill
+ find
+ find_end
+ includes
+ inner_product
+ is_permutation
+ is_sorted
+ max_element
+ merge
+ next_permutation
+ nth_element
+ partial_sum
+ partition
+ partition_point
+ prev_permutation
+ reverse
+ reverse_copy
+ rotate
+ rotate_copy
+ host_sort
+ random_number_engine
+ reduce_by_key
+ saxpy
+ search
+ search_n
+ set_difference
+ set_intersection
+ set_symmetric_difference
+ set_union
+ sort
+ sort_by_key
+ sort_float
+ stable_partition
+ uniform_int_distribution
+ unique
+ unique_copy
+)
+
+foreach(BENCHMARK ${BENCHMARKS})
+ set(PERF_TARGET perf_${BENCHMARK})
+ add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
+ target_link_libraries(${PERF_TARGET} ${OpenCL_LIBRARIES} ${Boost_LIBRARIES})
+endforeach()
+
+# stl benchmarks (for comparison)
+set(STL_BENCHMARKS
+ stl_accumulate
+ stl_count
+ stl_find
+ stl_find_end
+ stl_includes
+ stl_inner_product
+ stl_max_element
+ stl_merge
+ stl_next_permutation
+ stl_partial_sum
+ stl_partition
+ stl_prev_permutation
+ stl_reverse
+ stl_reverse_copy
+ stl_rotate
+ stl_rotate_copy
+ stl_saxpy
+ stl_search
+ stl_search_n
+ stl_set_difference
+ stl_set_intersection
+ stl_set_symmetric_difference
+ stl_set_union
+ stl_sort
+ stl_stable_partition
+ stl_unique
+ stl_unique_copy
+)
+
+# stl benchmarks which require c++11
+if(${BOOST_COMPUTE_USE_CPP11})
+ list(APPEND
+ STL_BENCHMARKS
+ stl_is_permutation
+ stl_partition_point
+ )
+endif()
+
+foreach(BENCHMARK ${STL_BENCHMARKS})
+ set(PERF_TARGET perf_${BENCHMARK})
+ add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
+ target_link_libraries(${PERF_TARGET} ${Boost_LIBRARIES})
+endforeach()
+
+# cuda/thrust benchmarks (for comparison)
+if(${BOOST_COMPUTE_HAVE_CUDA})
+ find_package(CUDA 5.0 REQUIRED)
+
+ set(CUDA_BENCHMARKS
+ thrust_accumulate
+ thrust_count
+ thrust_exclusive_scan
+ thrust_find
+ thrust_inner_product
+ thrust_merge
+ thrust_partial_sum
+ thrust_partition
+ thrust_reduce_by_key
+ thrust_reverse
+ thrust_reverse_copy
+ thrust_rotate
+ thrust_saxpy
+ thrust_set_difference
+ thrust_sort
+ thrust_unique
+ )
+
+ foreach(BENCHMARK ${CUDA_BENCHMARKS})
+ set(PERF_TARGET perf_${BENCHMARK})
+ cuda_add_executable(${PERF_TARGET} perf_${BENCHMARK}.cu)
+ target_link_libraries(${PERF_TARGET} ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
+ endforeach()
+endif()
+
+# intel tbb benchmarks (for comparison)
+if(${BOOST_COMPUTE_HAVE_TBB})
+ find_package(TBB REQUIRED)
+ include_directories(SYSTEM ${TBB_INCLUDE_DIRS})
+
+ set(TBB_BENCHMARKS
+ tbb_accumulate
+ tbb_merge
+ tbb_sort
+ )
+
+ foreach(BENCHMARK ${TBB_BENCHMARKS})
+ set(PERF_TARGET perf_${BENCHMARK})
+ add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
+ target_link_libraries(${PERF_TARGET} ${TBB_LIBRARIES} ${Boost_LIBRARIES})
+ endforeach()
+endif()
+
+# bolt c++ template lib benchmarks (for comparison)
+if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
+ find_package(Bolt REQUIRED)
+ include_directories(SYSTEM ${BOLT_INCLUDE_DIRS})
+
+ set(BOLT_BENCHMARKS
+ bolt_accumulate
+ bolt_count
+ bolt_exclusive_scan
+ bolt_fill
+ bolt_inner_product
+ bolt_max_element
+ bolt_merge
+ bolt_partial_sum
+ bolt_reduce_by_key
+ bolt_saxpy
+ bolt_sort
+ )
+
+ foreach(BENCHMARK ${BOLT_BENCHMARKS})
+ set(PERF_TARGET perf_${BENCHMARK})
+ add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
+ target_link_libraries(${PERF_TARGET} ${OpenCL_LIBRARIES} ${BOLT_LIBRARIES} ${Boost_LIBRARIES})
+ endforeach()
+elseif(${BOOST_COMPUTE_HAVE_BOLT} AND NOT ${BOOST_COMPUTE_USE_CPP11})
+ message(WARNING "BOOST_COMPUTE_USE_CPP11 must be ON for building Bolt C++ Template Library performance tests.")
+endif()
diff --git a/src/boost/libs/compute/perf/perf.hpp b/src/boost/libs/compute/perf/perf.hpp
new file mode 100644
index 00000000..cce0328c
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf.hpp
@@ -0,0 +1,109 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef PERF_HPP
+#define PERF_HPP
+
+// this header contains general purpose functions and variables used by
+// the boost.compute performance benchmarks.
+
+#include <vector>
+#include <cstdlib>
+#include <algorithm>
+
+#include <boost/lexical_cast.hpp>
+#include <boost/timer/timer.hpp>
+
+static size_t PERF_N = 1024;
+static size_t PERF_TRIALS = 3;
+
+// parses command line arguments and sets the corresponding perf variables
+inline void perf_parse_args(int argc, char *argv[])
+{
+ if(argc >= 2){
+ PERF_N = boost::lexical_cast<size_t>(argv[1]);
+ }
+
+ if(argc >= 3){
+ PERF_TRIALS = boost::lexical_cast<size_t>(argv[2]);
+ }
+}
+
+// generates a vector of random numbers
+template<class T>
+std::vector<T> generate_random_vector(const size_t size)
+{
+ std::vector<T> vector(size);
+ std::generate(vector.begin(), vector.end(), rand);
+ return vector;
+}
+
+// a simple timer wrapper which records multiple time entries
+class perf_timer
+{
+public:
+ typedef boost::timer::nanosecond_type nanosecond_type;
+
+ perf_timer()
+ {
+ timer.stop();
+ }
+
+ void start()
+ {
+ timer.start();
+ }
+
+ void stop()
+ {
+ timer.stop();
+ times.push_back(timer.elapsed().wall);
+ }
+
+ size_t trials() const
+ {
+ return times.size();
+ }
+
+ void clear()
+ {
+ times.clear();
+ }
+
+ nanosecond_type last_time() const
+ {
+ return times.back();
+ }
+
+ nanosecond_type min_time() const
+ {
+ return *std::min_element(times.begin(), times.end());
+ }
+
+ nanosecond_type max_time() const
+ {
+ return *std::max_element(times.begin(), times.end());
+ }
+
+ boost::timer::cpu_timer timer;
+ std::vector<boost::timer::nanosecond_type> times;
+};
+
+// returns the rate (in MB/s) for processing 'count' items of type 'T'
+// in 'time' nanoseconds
+template<class T>
+double perf_rate(const size_t count, perf_timer::nanosecond_type time)
+{
+ const size_t byte_count = count * sizeof(T);
+
+ return (double(byte_count) / 1024 / 1024) / (time / 1e9);
+}
+
+#endif // PERF_HPP
diff --git a/src/boost/libs/compute/perf/perf.py b/src/boost/libs/compute/perf/perf.py
new file mode 100755
index 00000000..c7b33f63
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf.py
@@ -0,0 +1,238 @@
+#!/usr/bin/python
+
+# Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+# Distributed under the Boost Software License, Version 1.0
+# See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt
+#
+# See http://boostorg.github.com/compute for more information.
+
+# driver script for boost.compute benchmarking. will run a
+# benchmark for a given function (e.g. accumulate, sort).
+
+import os
+import sys
+import subprocess
+
+try:
+ import pylab
+except:
+ print('pylab not found, no ploting...')
+ pass
+
+def run_perf_process(name, size, backend = ""):
+ if not backend:
+ proc = "perf_%s" % name
+ else:
+ proc = "perf_%s_%s" % (backend, name)
+
+ filename = "./perf/" + proc
+
+ if not os.path.isfile(filename):
+ print("Error: failed to find ", filename, " for running")
+ return 0
+ try:
+ output = subprocess.check_output([filename, str(int(size))])
+ except:
+ return 0
+
+ t = 0
+ for line in output.decode('utf8').split("\n"):
+ if line.startswith("time:"):
+ t = float(line.split(":")[1].split()[0])
+
+ return t
+
+class Report:
+ def __init__(self, name):
+ self.name = name
+ self.samples = {}
+
+ def add_sample(self, name, size, time):
+ if not name in self.samples:
+ self.samples[name] = []
+
+ self.samples[name].append((size, time))
+
+ def display(self):
+ for name in self.samples.keys():
+ print('=== %s with %s ===' % (self.name, name))
+ print('size,time (ms)')
+
+ for sample in self.samples[name]:
+ print('%d,%f' % sample)
+
+ def plot_time(self, name):
+ if not name in self.samples:
+ return
+
+ x = []
+ y = []
+
+ any_valid_samples = False
+
+ for sample in self.samples[name]:
+ if sample[1] == 0:
+ continue
+
+ x.append(sample[0])
+ y.append(sample[1])
+ any_valid_samples = True
+
+ if not any_valid_samples:
+ return
+
+ pylab.loglog(x, y, marker='o', label=name)
+ pylab.xlabel("Size")
+ pylab.ylabel("Time (ms)")
+ pylab.title(self.name)
+
+ def plot_rate(self, name):
+ if not name in self.samples:
+ return
+
+ x = []
+ y = []
+
+ any_valid_samples = False
+
+ for sample in self.samples[name]:
+ if sample[1] == 0:
+ continue
+
+ x.append(sample[0])
+ y.append(float(sample[0]) / (float(sample[1]) * 1e-3))
+ any_valid_samples = True
+
+ if not any_valid_samples:
+ return
+
+ pylab.loglog(x, y, marker='o', label=name)
+ pylab.xlabel("Size")
+ pylab.ylabel("Rate (values/s)")
+ pylab.title(self.name)
+
+def run_benchmark(name, sizes, vs=[]):
+ report = Report(name)
+
+ for size in sizes:
+ time = run_perf_process(name, size)
+
+ report.add_sample("compute", size, time)
+
+ competitors = {
+ "thrust" : [
+ "accumulate",
+ "count",
+ "exclusive_scan",
+ "find",
+ "inner_product",
+ "merge",
+ "partial_sum",
+ "partition",
+ "reduce_by_key",
+ "reverse",
+ "reverse_copy",
+ "rotate",
+ "saxpy",
+ "sort",
+ "unique"
+ ],
+ "bolt" : [
+ "accumulate",
+ "count",
+ "exclusive_scan",
+ "fill",
+ "inner_product",
+ "max_element",
+ "merge",
+ "partial_sum",
+ "reduce_by_key",
+ "saxpy",
+ "sort"
+ ],
+ "tbb": [
+ "accumulate",
+ "merge",
+ "sort"
+ ],
+ "stl": [
+ "accumulate",
+ "count",
+ "find",
+ "find_end",
+ "includes",
+ "inner_product",
+ "is_permutation",
+ "max_element",
+ "merge",
+ "next_permutation",
+ "nth_element",
+ "partial_sum",
+ "partition",
+ "partition_point",
+ "prev_permutation",
+ "reverse",
+ "reverse_copy",
+ "rotate",
+ "rotate_copy",
+ "saxpy",
+ "search",
+ "search_n",
+ "set_difference",
+ "set_intersection",
+ "set_symmetric_difference",
+ "set_union",
+ "sort",
+ "stable_partition",
+ "unique",
+ "unique_copy"
+ ]
+ }
+
+ for other in vs:
+ if not other in competitors:
+ continue
+ if not name in competitors[other]:
+ continue
+
+ for size in sizes:
+ time = run_perf_process(name, size, other)
+ report.add_sample(other, size, time)
+
+ return report
+
+if __name__ == '__main__':
+ test = "sort"
+ if len(sys.argv) >= 2:
+ test = sys.argv[1]
+ print('running %s perf test' % test)
+
+ sizes = [ pow(2, x) for x in range(1, 26) ]
+
+ sizes = sorted(sizes)
+
+ competitors = ["bolt", "tbb", "thrust", "stl"]
+
+ report = run_benchmark(test, sizes, competitors)
+
+ plot = None
+ if "--plot-time" in sys.argv:
+ plot = "time"
+ elif "--plot-rate" in sys.argv:
+ plot = "rate"
+
+ if plot == "time":
+ report.plot_time("compute")
+ for competitor in competitors:
+ report.plot_time(competitor)
+ elif plot == "rate":
+ report.plot_rate("compute")
+ for competitor in competitors:
+ report.plot_rate(competitor)
+
+ if plot:
+ pylab.legend(loc='upper left')
+ pylab.show()
+ else:
+ report.display()
diff --git a/src/boost/libs/compute/perf/perf_accumulate.cpp b/src/boost/libs/compute/perf/perf_accumulate.cpp
new file mode 100644
index 00000000..bd4276e6
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_accumulate.cpp
@@ -0,0 +1,140 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/accumulate.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+namespace po = boost::program_options;
+namespace compute = boost::compute;
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+template<class T>
+double perf_accumulate(const compute::vector<T>& data,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ perf_timer t;
+ for(size_t trial = 0; trial < trials; trial++){
+ t.start();
+ compute::accumulate(data.begin(), data.end(), T(0), queue);
+ queue.finish();
+ t.stop();
+ }
+ return t.min_time();
+}
+
+template<class T>
+void tune_accumulate(const compute::vector<T>& data,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ boost::shared_ptr<compute::detail::parameter_cache>
+ params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
+
+ const std::string cache_key =
+ std::string("__boost_reduce_on_gpu_") + compute::type_name<T>();
+
+ const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
+ const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+
+ double min_time = (std::numeric_limits<double>::max)();
+ compute::uint_ best_tpb = 0;
+ compute::uint_ best_vpt = 0;
+
+ for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
+ params->set(cache_key, "tpb", tpbs[i]);
+ for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){
+ params->set(cache_key, "vpt", vpts[j]);
+
+ try {
+ const double t = perf_accumulate(data, trials, queue);
+ if(t < min_time){
+ best_tpb = tpbs[i];
+ best_vpt = vpts[j];
+ min_time = t;
+ }
+ }
+ catch(compute::opencl_error&){
+ // invalid parameters for this device, skip
+ }
+ }
+ }
+
+ // store optimal parameters
+ params->set(cache_key, "tpb", best_tpb);
+ params->set(cache_key, "vpt", best_vpt);
+}
+
+int main(int argc, char *argv[])
+{
+ // setup command line arguments
+ po::options_description options("options");
+ options.add_options()
+ ("help", "show usage instructions")
+ ("size", po::value<size_t>()->default_value(8192), "input size")
+ ("trials", po::value<size_t>()->default_value(3), "number of trials to run")
+ ("tune", "run tuning procedure")
+ ;
+ po::positional_options_description positional_options;
+ positional_options.add("size", 1);
+
+ // parse command line
+ po::variables_map vm;
+ po::store(
+ po::command_line_parser(argc, argv)
+ .options(options).positional(positional_options).run(),
+ vm
+ );
+ po::notify(vm);
+
+ const size_t size = vm["size"].as<size_t>();
+ const size_t trials = vm["trials"].as<size_t>();
+ std::cout << "size: " << size << std::endl;
+
+ // setup context and queue for the default device
+ compute::device device = compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_data(size);
+ std::generate(host_data.begin(), host_data.end(), rand_int);
+
+ // create vector on the device and copy the data
+ compute::vector<int> device_data(
+ host_data.begin(), host_data.end(), queue
+ );
+
+ // run tuning proceure (if requested)
+ if(vm.count("tune")){
+ tune_accumulate(device_data, trials, queue);
+ }
+
+ // run benchmark
+ double t = perf_accumulate(device_data, trials, queue);
+ std::cout << "time: " << t / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp b/src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp
new file mode 100644
index 00000000..bffbe58f
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bernoulli_distribution.cpp
@@ -0,0 +1,46 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/random/default_random_engine.hpp>
+#include <boost/compute/random/bernoulli_distribution.hpp>
+
+#include "perf.hpp"
+
+namespace compute = boost::compute;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ compute::device device = compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+
+ compute::vector<bool> vector(PERF_N, context);
+
+ compute::default_random_engine rng(queue);
+ compute::bernoulli_distribution<float> dist(0.5);
+
+ perf_timer t;
+ t.start();
+ dist.generate(vector.begin(), vector.end(), rng, queue);
+ queue.finish();
+ t.stop();
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_binary_find.cpp b/src/boost/libs/compute/perf/perf_binary_find.cpp
new file mode 100644
index 00000000..ee7c4631
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_binary_find.cpp
@@ -0,0 +1,71 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/algorithm/detail/binary_find.hpp>
+#include <boost/compute/algorithm/partition.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ using boost::compute::_1;
+ boost::compute::partition(
+ device_vector.begin(), device_vector.end(), _1 < 20, queue
+ );
+
+ // just to be sure everything is finished before measuring execution time
+ // of binary_find algorithm
+ queue.finish();
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::detail::binary_find(
+ device_vector.begin(), device_vector.end(), _1 >= 20, queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_accumulate.cpp b/src/boost/libs/compute/perf/perf_bolt_accumulate.cpp
new file mode 100644
index 00000000..5a6b9b9c
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_accumulate.cpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+#include <bolt/cl/reduce.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create host vector
+ std::vector<int> host_vec = generate_random_vector<int>(PERF_N);
+
+ // create device vectors
+ bolt::cl::device_vector<int> device_vec(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin());
+
+ int sum = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ sum = bolt::cl::reduce(device_vec.begin(), device_vec.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "sum: " << sum << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_count.cpp b/src/boost/libs/compute/perf/perf_bolt_count.cpp
new file mode 100644
index 00000000..a23c5ac3
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_count.cpp
@@ -0,0 +1,57 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/count.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> h_vec(PERF_N);
+ std::generate(h_vec.begin(), h_vec.end(), rand_int);
+
+ // create device vector
+ bolt::cl::device_vector<int> d_vec(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+ size_t count = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ count = bolt::cl::count(ctrl, d_vec.begin(), d_vec.end(), 4);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "count: " << count << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp b/src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp
new file mode 100644
index 00000000..a60e4655
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_exclusive_scan.cpp
@@ -0,0 +1,52 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/scan.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // create device vector
+ bolt::cl::device_vector<int> d_vec(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ // transfer data to the device
+ bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+ t.start();
+ bolt::cl::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_fill.cpp b/src/boost/libs/compute/perf/perf_bolt_fill.cpp
new file mode 100644
index 00000000..50b6e85e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_fill.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/fill.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create device vector (filled with zeros)
+ bolt::cl::device_vector<int> d_vec(PERF_N, 0);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ bolt::cl::fill(d_vec.begin(), d_vec.end(), int(trial));
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_inner_product.cpp b/src/boost/libs/compute/perf/perf_bolt_inner_product.cpp
new file mode 100644
index 00000000..4c9652e6
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_inner_product.cpp
@@ -0,0 +1,56 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/inner_product.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create host vectors
+ std::vector<int> host_x = generate_random_vector<int>(PERF_N);
+ std::vector<int> host_y = generate_random_vector<int>(PERF_N);
+
+ // create device vectors
+ bolt::cl::device_vector<int> device_x(PERF_N);
+ bolt::cl::device_vector<int> device_y(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin());
+ bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin());
+
+ int product = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ product = bolt::cl::inner_product(
+ device_x.begin(), device_x.end(), device_y.begin(), 0
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "product: " << product << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_max_element.cpp b/src/boost/libs/compute/perf/perf_bolt_max_element.cpp
new file mode 100644
index 00000000..238b1ba2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_max_element.cpp
@@ -0,0 +1,69 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+#include <bolt/cl/max_element.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>(rand() % 10000000);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create host vector
+ std::vector<int> host_vec = generate_random_vector<int>(PERF_N);
+
+ // create device vectors
+ bolt::cl::device_vector<int> device_vec(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin());
+
+ bolt::cl::device_vector<int>::iterator max_iter = device_vec.begin();
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ max_iter = bolt::cl::max_element(device_vec.begin(), device_vec.end());
+ t.stop();
+ }
+
+ int device_max = *max_iter;
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "max: " << device_max << std::endl;
+
+ // verify max is correct
+ int host_max = *std::max_element(host_vec.begin(), host_vec.end());
+ if(device_max != host_max){
+ std::cout << "ERROR: "
+ << "device_max (" << device_max << ") "
+ << "!= "
+ << "host_max (" << host_max << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_merge.cpp b/src/boost/libs/compute/perf/perf_bolt_merge.cpp
new file mode 100644
index 00000000..c5ee12ac
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_merge.cpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/merge.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vec1 = generate_random_vector<int>(std::floor(PERF_N / 2.0));
+ std::vector<int> host_vec2 = generate_random_vector<int>(std::ceil(PERF_N / 2.0));
+ // sort them
+ std::sort(host_vec1.begin(), host_vec1.end());
+ std::sort(host_vec2.begin(), host_vec2.end());
+
+ // create device vectors
+ bolt::cl::device_vector<int> device_vec1(PERF_N);
+ bolt::cl::device_vector<int> device_vec2(PERF_N);
+ bolt::cl::device_vector<int> device_vec3(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_vec1.begin(), host_vec1.end(), device_vec1.begin());
+ bolt::cl::copy(host_vec2.begin(), host_vec2.end(), device_vec2.begin());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ bolt::cl::merge(
+ device_vec1.begin(), device_vec1.end(),
+ device_vec2.begin(), device_vec2.end(),
+ device_vec3.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp b/src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp
new file mode 100644
index 00000000..2f9c830e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_partial_sum.cpp
@@ -0,0 +1,53 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/scan.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // create device vector
+ bolt::cl::device_vector<int> d_vec(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ // transfer data to the device
+ bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+ t.start();
+ bolt::cl::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
+
diff --git a/src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp b/src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp
new file mode 100644
index 00000000..e7668498
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_reduce_by_key.cpp
@@ -0,0 +1,100 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+#include <bolt/cl/reduce_by_key.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+ int current;
+ int avgValuesNoPerKey;
+
+ unique_key()
+ {
+ current = 0;
+ avgValuesNoPerKey = 512;
+ }
+
+ int operator()()
+ {
+ double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+ if((rand() / double(RAND_MAX)) <= p)
+ return ++current;
+ return current;
+ }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create vector of keys and random values
+ std::vector<int> host_keys(PERF_N);
+ std::vector<int> host_values(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+ std::generate(host_values.begin(), host_values.end(), rand_int);
+
+ // create device vectors for data
+ bolt::cl::device_vector<int> device_keys(PERF_N);
+ bolt::cl::device_vector<int> device_values(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_keys.begin(), host_keys.end(), device_keys.begin());
+ bolt::cl::copy(host_values.begin(), host_values.end(), device_values.begin());
+
+ // create device vectors for the results
+ bolt::cl::device_vector<int> device_keys_results(PERF_N);
+ bolt::cl::device_vector<int> device_values_results(PERF_N);
+
+ typedef bolt::cl::device_vector<int>::iterator iterType;
+ bolt::cl::pair<iterType, iterType> result = {
+ device_keys_results.begin(),
+ device_values_results.begin()
+ };
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ result = bolt::cl::reduce_by_key(device_keys.begin(),
+ device_keys.end(),
+ device_values.begin(),
+ device_keys_results.begin(),
+ device_values_results.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ size_t result_size = bolt::cl::distance(device_keys_results.begin(), result.first);
+ if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+ std::cout << "ERROR: "
+ << "wrong number of keys"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_saxpy.cpp b/src/boost/libs/compute/perf/perf_bolt_saxpy.cpp
new file mode 100644
index 00000000..201f683a
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_saxpy.cpp
@@ -0,0 +1,76 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+#include <bolt/cl/transform.h>
+
+#include "perf.hpp"
+
+BOLT_FUNCTOR(saxpy_functor,
+ struct saxpy_functor
+ {
+ float _a;
+ saxpy_functor(float a) : _a(a) {};
+
+ float operator() (const float &x, const float &y) const
+ {
+ return _a * x + y;
+ };
+ };
+)
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create host vectors
+ std::vector<float> host_x(PERF_N);
+ std::vector<float> host_y(PERF_N);
+ std::generate(host_x.begin(), host_x.end(), rand);
+ std::generate(host_y.begin(), host_y.end(), rand);
+
+ // create device vectors
+ bolt::cl::device_vector<float> device_x(PERF_N);
+ bolt::cl::device_vector<float> device_y(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin());
+ bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ bolt::cl::transform(
+ device_x.begin(), device_x.end(),
+ device_y.begin(),
+ device_y.begin(),
+ saxpy_functor(2.5f)
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ bolt::cl::copy(device_x.begin(), device_x.end(), host_x.begin());
+ bolt::cl::copy(device_y.begin(), device_y.end(), host_y.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_bolt_sort.cpp b/src/boost/libs/compute/perf/perf_bolt_sort.cpp
new file mode 100644
index 00000000..2508ec06
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_bolt_sort.cpp
@@ -0,0 +1,50 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <vector>
+
+#include <bolt/cl/sort.h>
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ ::cl::Device device = bolt::cl::control::getDefault().getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create host vector
+ std::vector<int> h_vec = generate_random_vector<int>(PERF_N);
+ // create device vector
+ bolt::cl::device_vector<int> d_vec(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ // transfer data to the device
+ bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
+
+ t.start();
+ bolt::cl::sort(d_vec.begin(), d_vec.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_cart_to_polar.cpp b/src/boost/libs/compute/perf/perf_cart_to_polar.cpp
new file mode 100644
index 00000000..e0d333eb
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_cart_to_polar.cpp
@@ -0,0 +1,158 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#define _USE_MATH_DEFINES
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/copy_n.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+namespace compute = boost::compute;
+
+using compute::float2_;
+
+float rand_float()
+{
+ return (float(rand()) / float(RAND_MAX)) * 1000.f;
+}
+
+void serial_cartesian_to_polar(const float *input, size_t n, float *output)
+{
+ for(size_t i = 0; i < n; i++){
+ float x = input[i*2+0];
+ float y = input[i*2+1];
+
+ float magnitude = std::sqrt(x*x + y*y);
+ float angle = std::atan2(y, x) * 180.f / M_PI;
+
+ output[i*2+0] = magnitude;
+ output[i*2+1] = angle;
+ }
+}
+
+void serial_polar_to_cartesian(const float *input, size_t n, float *output)
+{
+ for(size_t i = 0; i < n; i++){
+ float magnitude = input[i*2+0];
+ float angle = input[i*2+1];
+
+ float x = magnitude * cos(angle);
+ float y = magnitude * sin(angle);
+
+ output[i*2+0] = x;
+ output[i*2+1] = y;
+ }
+}
+
+// converts from cartesian coordinates (x, y) to polar coordinates (magnitude, angle)
+BOOST_COMPUTE_FUNCTION(float2_, cartesian_to_polar, (float2_ p),
+{
+ float x = p.x;
+ float y = p.y;
+
+ float magnitude = sqrt(x*x + y*y);
+ float angle = atan2(y, x) * 180.f / M_PI;
+
+ return (float2)(magnitude, angle);
+});
+
+// converts from polar coordinates (magnitude, angle) to cartesian coordinates (x, y)
+BOOST_COMPUTE_FUNCTION(float2_, polar_to_cartesian, (float2_ p),
+{
+ float magnitude = p.x;
+ float angle = p.y;
+
+ float x = magnitude * cos(angle);
+ float y = magnitude * sin(angle);
+
+ return (float2)(x, y)
+});
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ compute::device device = compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<float> host_vector(PERF_N*2);
+ std::generate(host_vector.begin(), host_vector.end(), rand_float);
+
+ // create vector on the device and copy the data
+ compute::vector<float2_> device_vector(PERF_N, context);
+ compute::copy_n(
+ reinterpret_cast<float2_ *>(&host_vector[0]),
+ PERF_N,
+ device_vector.begin(),
+ queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ compute::transform(
+ device_vector.begin(),
+ device_vector.end(),
+ device_vector.begin(),
+ cartesian_to_polar,
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // perform saxpy on host
+ t.clear();
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ serial_cartesian_to_polar(&host_vector[0], PERF_N, &host_vector[0]);
+ t.stop();
+ }
+ std::cout << "host time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ std::vector<float> device_data(PERF_N*2);
+ compute::copy(
+ device_vector.begin(),
+ device_vector.end(),
+ reinterpret_cast<float2_ *>(&device_data[0]),
+ queue
+ );
+
+ for(size_t i = 0; i < PERF_N; i++){
+ float host_value = host_vector[i];
+ float device_value = device_data[i];
+
+ if(std::abs(device_value - host_value) > 1e-3){
+ std::cout << "ERROR: "
+ << "value at " << i << " "
+ << "device_value (" << device_value << ") "
+ << "!= "
+ << "host_value (" << host_value << ")"
+ << std::endl;
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_comparison_sort.cpp b/src/boost/libs/compute/perf/perf_comparison_sort.cpp
new file mode 100644
index 00000000..fa63303f
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_comparison_sort.cpp
@@ -0,0 +1,86 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/algorithm/is_sorted.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+namespace po = boost::program_options;
+namespace compute = boost::compute;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ using boost::compute::int_;
+
+ // create vector of random numbers on the host
+ std::vector<int_> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int_> device_vector(PERF_N, context);
+
+ // less function for float
+ BOOST_COMPUTE_FUNCTION(bool, comp, (int_ a, int_ b),
+ {
+ return a < b;
+ });
+
+ // sort vector
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+ queue.finish();
+
+ t.start();
+ boost::compute::sort(
+ device_vector.begin(),
+ device_vector.end(),
+ comp,
+ queue
+ );
+ queue.finish();
+ t.stop();
+ };
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify vector is sorted
+ if(!boost::compute::is_sorted(device_vector.begin(),
+ device_vector.end(),
+ comp,
+ queue)){
+ std::cout << "ERROR: is_sorted() returned false" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_copy_if.cpp b/src/boost/libs/compute/perf/perf_copy_if.cpp
new file mode 100644
index 00000000..b2c162f6
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_copy_if.cpp
@@ -0,0 +1,122 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <boost/compute/core.hpp>
+#include <boost/compute/closure.hpp>
+#include <boost/compute/algorithm/copy_if.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/random/default_random_engine.hpp>
+#include <boost/compute/random/uniform_int_distribution.hpp>
+#include <boost/compute/random/uniform_real_distribution.hpp>
+
+#include "perf.hpp"
+
+namespace compute = boost::compute;
+
+void test_copy_if_odd(compute::command_queue &queue)
+{
+ // create input and output vectors on the device
+ const compute::context &context = queue.get_context();
+ compute::vector<int> input(PERF_N, context);
+ compute::vector<int> output(PERF_N, context);
+
+ // generate random numbers between 1 and 10
+ compute::default_random_engine rng(queue);
+ compute::uniform_int_distribution<int> d(1, 10);
+ d.generate(input.begin(), input.end(), rng, queue);
+
+ BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x),
+ {
+ return x & 1;
+ });
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ compute::vector<int>::iterator i = compute::copy_if(
+ input.begin(), input.end(), output.begin(), is_odd, queue
+ );
+ queue.finish();
+ t.stop();
+
+ float ratio = float(std::distance(output.begin(), i)) / PERF_N;
+ if(PERF_N > 1000 && (ratio < 0.45f || ratio > 0.55f)){
+ std::cerr << "error: ratio is " << ratio << std::endl;
+ std::cerr << "error: ratio should be around 45-55%" << std::endl;
+ }
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+}
+
+void test_copy_if_in_sphere(compute::command_queue &queue)
+{
+ using boost::compute::float4_;
+
+ // create input and output vectors on the device
+ const compute::context &context = queue.get_context();
+ compute::vector<float4_> input_points(PERF_N, context);
+ compute::vector<float4_> output_points(PERF_N, context);
+
+ // generate random numbers in a cube
+ float radius = 5.0f;
+ compute::default_random_engine rng(queue);
+ compute::uniform_real_distribution<float> d(-radius, +radius);
+ d.generate(
+ compute::make_buffer_iterator<float>(input_points.get_buffer(), 0),
+ compute::make_buffer_iterator<float>(input_points.get_buffer(), PERF_N * 4),
+ rng,
+ queue
+ );
+
+ // predicate which returns true if the point lies within the sphere
+ BOOST_COMPUTE_CLOSURE(bool, is_in_sphere, (float4_ point), (radius),
+ {
+ // ignore fourth component
+ point.w = 0;
+
+ return length(point) < radius;
+ });
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ compute::vector<float4_>::iterator i = compute::copy_if(
+ input_points.begin(),
+ input_points.end(),
+ output_points.begin(),
+ is_in_sphere,
+ queue
+ );
+ queue.finish();
+ t.stop();
+
+ float ratio = float(std::distance(output_points.begin(), i)) / PERF_N;
+ if(PERF_N > 1000 && (ratio < 0.5f || ratio > 0.6f)){
+ std::cerr << "error: ratio is " << ratio << std::endl;
+ std::cerr << "error: ratio should be around 50-60%" << std::endl;
+ }
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ test_copy_if_odd(queue);
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_copy_to_device.cpp b/src/boost/libs/compute/perf/perf_copy_to_device.cpp
new file mode 100644
index 00000000..0cda3d0f
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_copy_to_device.cpp
@@ -0,0 +1,55 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <cstdlib>
+#include <iostream>
+
+#include <boost/compute.hpp>
+
+int main(int argc, char *argv[])
+{
+ size_t size = 1000;
+ if(argc >= 2){
+ size = boost::lexical_cast<size_t>(argv[1]);
+ }
+
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+
+ boost::compute::command_queue::properties
+ properties = boost::compute::command_queue::enable_profiling;
+ boost::compute::command_queue queue(context, device, properties);
+
+ std::vector<int> host_vector(size);
+ std::generate(host_vector.begin(), host_vector.end(), rand);
+
+ boost::compute::vector<int> device_vector(host_vector.size(), context);
+
+ boost::compute::future<void> future =
+ boost::compute::copy_async(host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue);
+
+ // wait for copy to finish
+ future.wait();
+
+ // get elapsed time in nanoseconds
+ size_t elapsed =
+ future.get_event().duration<boost::chrono::nanoseconds>().count();
+
+ std::cout << "time: " << elapsed / 1e6 << " ms" << std::endl;
+
+ float rate = (float(size * sizeof(int)) / elapsed) * 1000.f;
+ std::cout << "rate: " << rate << " MB/s" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_count.cpp b/src/boost/libs/compute/perf/perf_count.cpp
new file mode 100644
index 00000000..a47eb615
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_count.cpp
@@ -0,0 +1,77 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/count.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ size_t count = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ count = boost::compute::count(
+ device_vector.begin(), device_vector.end(), 4, queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "count: " << count << std::endl;
+
+ // verify count is correct
+ size_t host_count = std::count(host_vector.begin(),
+ host_vector.end(),
+ 4);
+ if(count != host_count){
+ std::cout << "ERROR: "
+ << "device_count (" << count << ") "
+ << "!= "
+ << "host_count (" << host_count << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_discrete_distribution.cpp b/src/boost/libs/compute/perf/perf_discrete_distribution.cpp
new file mode 100644
index 00000000..f6679eb1
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_discrete_distribution.cpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/random/default_random_engine.hpp>
+#include <boost/compute/random/discrete_distribution.hpp>
+
+#include "perf.hpp"
+
+namespace compute = boost::compute;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ compute::device device = compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+
+ compute::vector<compute::uint_> vector(PERF_N, context);
+
+ int weights[] = {1, 1};
+
+ compute::default_random_engine rng(queue);
+ compute::discrete_distribution<compute::uint_> dist(weights, weights+2);
+
+ perf_timer t;
+ t.start();
+ dist.generate(vector.begin(), vector.end(), rng, queue);
+ queue.finish();
+ t.stop();
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_erase_remove.cpp b/src/boost/libs/compute/perf/perf_erase_remove.cpp
new file mode 100644
index 00000000..f567247f
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_erase_remove.cpp
@@ -0,0 +1,61 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/remove.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 10.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::vector<int> device_vector(
+ host_vector.begin(), host_vector.end(), queue
+ );
+
+ t.start();
+ device_vector.erase(
+ boost::compute::remove(
+ device_vector.begin(), device_vector.end(), 4, queue
+ ),
+ device_vector.end(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_exclusive_scan.cpp b/src/boost/libs/compute/perf/perf_exclusive_scan.cpp
new file mode 100644
index 00000000..e6c65135
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_exclusive_scan.cpp
@@ -0,0 +1,97 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Benoit
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/exclusive_scan.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::vector<int> device_res(PERF_N,context);
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ // sum vector
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ t.start();
+ boost::compute::exclusive_scan(
+ device_vector.begin(),
+ device_vector.end(),
+ device_res.begin(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify sum is correct
+ std::partial_sum(
+ host_vector.begin(),
+ host_vector.end(),
+ host_vector.begin()
+ );
+
+ int device_sum = device_res.back();
+ // when scan is exclusive values are shifted by one on the left
+ // compared to a inclusive scan
+ int host_sum = host_vector[host_vector.size()-2];
+
+ if(device_sum != host_sum){
+ std::cout << "ERROR: "
+ << "device_sum (" << device_sum << ") "
+ << "!= "
+ << "host_sum (" << host_sum << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_fill.cpp b/src/boost/libs/compute/perf/perf_fill.cpp
new file mode 100644
index 00000000..9f35b6b0
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_fill.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector on the device (filled with zeros)
+ boost::compute::vector<int> vec(PERF_N, 0, queue);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::fill(vec.begin(), vec.end(), int(trial), queue);
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_find.cpp b/src/boost/libs/compute/perf/perf_find.cpp
new file mode 100644
index 00000000..3cfb75ad
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_find.cpp
@@ -0,0 +1,88 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/find.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+// Max integer that can be generated by rand_int() function.
+int rand_int_max = 25;
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * rand_int_max);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ // trying to find element that isn't in vector (worst-case scenario)
+ int wanted = rand_int_max + 1;
+
+ // device iterator
+ boost::compute::vector<int>::iterator device_result_it;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ device_result_it = boost::compute::find(device_vector.begin(),
+ device_vector.end(),
+ wanted,
+ queue);
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify if found index is correct by comparing it with std::find() result
+ size_t host_result_index = std::distance(host_vector.begin(),
+ std::find(host_vector.begin(),
+ host_vector.end(),
+ wanted));
+ size_t device_result_index = device_result_it.get_index();
+
+ if(device_result_index != host_result_index){
+ std::cout << "ERROR: "
+ << "device_result_index (" << device_result_index << ") "
+ << "!= "
+ << "host_result_index (" << host_result_index << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_find_end.cpp b/src/boost/libs/compute/perf/perf_find_end.cpp
new file mode 100644
index 00000000..e20a7e8d
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_find_end.cpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/find_end.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int pattern[] = {2, 6, 6, 7, 8, 4};
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ boost::compute::vector<int> pattern_vector(pattern, pattern + 6, queue);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::find_end(
+ device_vector.begin(), device_vector.end(),
+ pattern_vector.begin(), pattern_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_host_sort.cpp b/src/boost/libs/compute/perf/perf_host_sort.cpp
new file mode 100644
index 00000000..d34b1c52
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_host_sort.cpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/timer/timer.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> random_vector(PERF_N);
+ std::generate(random_vector.begin(), random_vector.end(), rand);
+
+ // create input vector for gpu
+ std::vector<int> gpu_vector = random_vector;
+
+ // sort vector on gpu
+ boost::timer::cpu_timer t;
+ boost::compute::sort(
+ gpu_vector.begin(), gpu_vector.end(), queue
+ );
+ queue.finish();
+ std::cout << "time: " << t.elapsed().wall / 1e6 << " ms" << std::endl;
+
+ // create input vector for host
+ std::vector<int> host_vector = random_vector;
+
+ // sort vector on host
+ t.start();
+ std::sort(host_vector.begin(), host_vector.end());
+ std::cout << "host time: " << t.elapsed().wall / 1e6 << " ms" << std::endl;
+
+ // ensure that both sorted vectors are equal
+ if(!std::equal(gpu_vector.begin(), gpu_vector.end(), host_vector.begin())){
+ std::cerr << "ERROR: sorted vectors not the same" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_includes.cpp b/src/boost/libs/compute/perf/perf_includes.cpp
new file mode 100644
index 00000000..0418a5d7
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_includes.cpp
@@ -0,0 +1,68 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/includes.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vectors of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ std::sort(host_vector.begin(), host_vector.end());
+
+ // create vectors on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ boost::compute::vector<int> device_vector2(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector2.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::includes(
+ device_vector.begin(), device_vector.end(),
+ device_vector2.begin(), device_vector2.end(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_inner_product.cpp b/src/boost/libs/compute/perf/perf_inner_product.cpp
new file mode 100644
index 00000000..112a4bc6
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_inner_product.cpp
@@ -0,0 +1,74 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/inner_product.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ std::vector<int> h1(PERF_N);
+ std::vector<int> h2(PERF_N);
+ std::generate(h1.begin(), h1.end(), rand_int);
+ std::generate(h2.begin(), h2.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> d1(PERF_N, context);
+ boost::compute::vector<int> d2(PERF_N, context);
+ boost::compute::copy(h1.begin(), h1.end(), d1.begin(), queue);
+ boost::compute::copy(h2.begin(), h2.end(), d2.begin(), queue);
+
+ int product = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ product = boost::compute::inner_product(
+ d1.begin(), d1.end(), d2.begin(), int(0), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify product is correct
+ int host_product = std::inner_product(
+ h1.begin(), h1.end(), h2.begin(), int(0)
+ );
+ if(product != host_product){
+ std::cout << "ERROR: "
+ << "device_product (" << product << ") "
+ << "!= "
+ << "host_product (" << host_product << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_is_permutation.cpp b/src/boost/libs/compute/perf/perf_is_permutation.cpp
new file mode 100644
index 00000000..39c72bf1
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_is_permutation.cpp
@@ -0,0 +1,66 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/is_permutation.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ boost::compute::vector<int> device_vector2(PERF_N, context);
+ boost::compute::copy(
+ host_vector.rbegin(), host_vector.rend(), device_vector2.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::is_permutation(
+ device_vector.begin(), device_vector.end(),
+ device_vector2.begin(), device_vector2.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_is_sorted.cpp b/src/boost/libs/compute/perf/perf_is_sorted.cpp
new file mode 100644
index 00000000..f16a3172
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_is_sorted.cpp
@@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/is_sorted.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ // sort and then reverse the random vector
+ boost::compute::sort(device_vector.begin(), device_vector.end(), queue);
+ boost::compute::reverse(device_vector.begin(), device_vector.end(), queue);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ bool sorted = boost::compute::is_sorted(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ if(sorted){
+ std::cerr << "ERROR: is_sorted() returned true" << std::endl;
+ }
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_max_element.cpp b/src/boost/libs/compute/perf/perf_max_element.cpp
new file mode 100644
index 00000000..0e47c67e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_max_element.cpp
@@ -0,0 +1,93 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Rastko Anicic <anicic.rastko@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/max_element.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>(rand() % 10000000);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ boost::compute::vector<int>::iterator device_max_iter
+ = device_vector.begin();
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ device_max_iter = boost::compute::max_element(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+
+ int device_max = device_max_iter.read(queue);
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "max: " << device_max << std::endl;
+
+ // verify max is correct
+ std::vector<int>::iterator host_max_iter
+ = std::max_element(host_vector.begin(), host_vector.end());
+
+ int host_max = *host_max_iter;
+ if(device_max != host_max){
+ std::cout << "ERROR: "
+ << "device_max (" << device_max << ") "
+ << "!= "
+ << "host_max (" << host_max << ")"
+ << std::endl;
+ return -1;
+ }
+
+ size_t host_max_idx = std::distance(host_vector.begin(), host_max_iter);
+ size_t device_max_idx = std::distance(device_vector.begin(), device_max_iter);
+ if(device_max_idx != host_max_idx){
+ std::cout << "ERROR: "
+ << "device_max index (" << device_max_idx << ") "
+ << "!= "
+ << "host_max index (" << host_max_idx << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_merge.cpp b/src/boost/libs/compute/perf/perf_merge.cpp
new file mode 100644
index 00000000..58ea836f
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_merge.cpp
@@ -0,0 +1,69 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <cmath>
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/merge.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ std::vector<int> v1 = generate_random_vector<int>(std::floor(PERF_N / 2.0));
+ std::vector<int> v2 = generate_random_vector<int>(std::ceil(PERF_N / 2.0));
+ std::vector<int> v3(PERF_N);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ boost::compute::vector<int> gpu_v1(v1.begin(), v1.end(), queue);
+ boost::compute::vector<int> gpu_v2(v2.begin(), v2.end(), queue);
+ boost::compute::vector<int> gpu_v3(PERF_N, context);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::merge(gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ std::vector<int> check_v3(PERF_N);
+ boost::compute::copy(gpu_v3.begin(), gpu_v3.end(), check_v3.begin(), queue);
+ queue.finish();
+
+ std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
+ bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin());
+ if(!ok){
+ std::cerr << "ERROR: merged ranges different" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_next_permutation.cpp b/src/boost/libs/compute/perf/perf_next_permutation.cpp
new file mode 100644
index 00000000..62f0bbb0
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_next_permutation.cpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/next_permutation.hpp>
+#include <boost/compute/algorithm/prev_permutation.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ std::sort(host_vector.begin(), host_vector.end(), std::greater<int>());
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::next_permutation(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ boost::compute::prev_permutation(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_nth_element.cpp b/src/boost/libs/compute/perf/perf_nth_element.cpp
new file mode 100644
index 00000000..07627cb4
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_nth_element.cpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/nth_element.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::nth_element(
+ device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_partial_sum.cpp b/src/boost/libs/compute/perf/perf_partial_sum.cpp
new file mode 100644
index 00000000..1c02b4c4
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_partial_sum.cpp
@@ -0,0 +1,97 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/partial_sum.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ using boost::compute::int_;
+
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int_> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int_> device_vector(PERF_N, context);
+ boost::compute::vector<int_> device_res(PERF_N,context);
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ // sum vector
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ t.start();
+ boost::compute::partial_sum(
+ device_vector.begin(),
+ device_vector.end(),
+ device_res.begin(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify sum is correct
+ std::partial_sum(
+ host_vector.begin(),
+ host_vector.end(),
+ host_vector.begin()
+ );
+
+ int device_sum = device_res.back();
+ int host_sum = host_vector.back();
+
+ if(device_sum != host_sum){
+ std::cout << "ERROR: "
+ << "device_sum (" << device_sum << ") "
+ << "!= "
+ << "host_sum (" << host_sum << ")"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_partition.cpp b/src/boost/libs/compute/perf/perf_partition.cpp
new file mode 100644
index 00000000..b765d736
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_partition.cpp
@@ -0,0 +1,66 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/partition.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ using boost::compute::_1;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ t.start();
+ boost::compute::partition(
+ device_vector.begin(), device_vector.end(), _1 < 10, queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_partition_point.cpp b/src/boost/libs/compute/perf/perf_partition_point.cpp
new file mode 100644
index 00000000..91261aa1
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_partition_point.cpp
@@ -0,0 +1,68 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/algorithm/partition.hpp>
+#include <boost/compute/algorithm/partition_point.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ using boost::compute::_1;
+ boost::compute::partition(
+ device_vector.begin(), device_vector.end(), _1 < 20, queue
+ );
+
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::partition_point(
+ device_vector.begin(), device_vector.end(), _1 < 20, queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_prev_permutation.cpp b/src/boost/libs/compute/perf/perf_prev_permutation.cpp
new file mode 100644
index 00000000..bb7d76d2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_prev_permutation.cpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/next_permutation.hpp>
+#include <boost/compute/algorithm/prev_permutation.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ std::sort(host_vector.begin(), host_vector.end());
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::prev_permutation(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ boost::compute::next_permutation(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_random_number_engine.cpp b/src/boost/libs/compute/perf/perf_random_number_engine.cpp
new file mode 100644
index 00000000..db25d437
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_random_number_engine.cpp
@@ -0,0 +1,101 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/core.hpp>
+#include <boost/compute/random.hpp>
+
+#include "perf.hpp"
+
+namespace compute = boost::compute;
+namespace po = boost::program_options;
+
+template<class Engine>
+void perf_random_number_engine(const size_t size,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ typedef typename Engine::result_type T;
+
+ // create random number engine
+ Engine engine(queue);
+
+ // create vector on the device
+ std::cout << "size = " << size << std::endl;
+ compute::vector<T> vector(size, queue.get_context());
+
+ // generate random numbers
+ perf_timer t;
+ for(size_t i = 0; i < trials; i++){
+ t.start();
+ engine.generate(vector.begin(), vector.end(), queue);
+ queue.finish();
+ t.stop();
+ }
+
+ // print result
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "rate: " << perf_rate<T>(size, t.min_time()) << " MB/s" << std::endl;
+}
+
+int main(int argc, char *argv[])
+{
+ // setup and parse command line options
+ po::options_description options("options");
+ options.add_options()
+ ("help", "show usage instructions")
+ ("size", po::value<size_t>()->default_value(8192), "number of values")
+ ("trials", po::value<size_t>()->default_value(3), "number of trials")
+ ("engine", po::value<std::string>()->default_value("default_random_engine"), "random number engine")
+ ;
+ po::variables_map vm;
+ po::store(po::parse_command_line(argc, argv, options), vm);
+ po::notify(vm);
+
+ if(vm.count("help")) {
+ std::cout << options << std::endl;
+ return 0;
+ }
+
+ // setup context and queue for the default device
+ compute::device device = compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+
+ // get command line options
+ const size_t size = vm["size"].as<size_t>();
+ const size_t trials = vm["trials"].as<size_t>();
+ const std::string& engine = vm["engine"].as<std::string>();
+
+ // run benchmark
+ if(engine == "default_random_engine"){
+ perf_random_number_engine<compute::default_random_engine>(size, trials, queue);
+ }
+ else if(engine == "mersenne_twister_engine"){
+ perf_random_number_engine<compute::mt19937>(size, trials, queue);
+ }
+ else if(engine == "linear_congruential_engine"){
+ perf_random_number_engine<compute::linear_congruential_engine<> >(size, trials, queue);
+ }
+ else if(engine == "threefry_engine"){
+ perf_random_number_engine<compute::threefry_engine<> >(size, trials, queue);
+ }
+ else {
+ std::cerr << "error: unknown random number engine '" << engine << "'" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_reduce_by_key.cpp b/src/boost/libs/compute/perf/perf_reduce_by_key.cpp
new file mode 100644
index 00000000..c88d450e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_reduce_by_key.cpp
@@ -0,0 +1,114 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/reduce_by_key.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+ int current;
+ int avgValuesNoPerKey;
+
+ unique_key()
+ {
+ current = 0;
+ avgValuesNoPerKey = 512;
+ }
+
+ int operator()()
+ {
+ double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+ if((rand() / double(RAND_MAX)) <= p)
+ return ++current;
+ return current;
+ }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of keys and random values
+ std::vector<int> host_keys(PERF_N);
+ std::vector<int> host_values(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+ std::generate(host_values.begin(), host_values.end(), rand_int);
+
+ // create vectors for keys and values on the device and copy the data
+ boost::compute::vector<int> device_keys(PERF_N, context);
+ boost::compute::vector<int> device_values(PERF_N,context);
+ boost::compute::copy(
+ host_keys.begin(),
+ host_keys.end(),
+ device_keys.begin(),
+ queue
+ );
+ boost::compute::copy(
+ host_values.begin(),
+ host_values.end(),
+ device_values.begin(),
+ queue
+ );
+
+ // vectors for the results
+ boost::compute::vector<int> device_keys_results(PERF_N, context);
+ boost::compute::vector<int> device_values_results(PERF_N,context);
+
+ typedef boost::compute::vector<int>::iterator iterType;
+ std::pair<iterType, iterType> result(
+ device_keys_results.begin(),
+ device_values_results.begin()
+ );
+
+ // reduce by key
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ result = boost::compute::reduce_by_key(device_keys.begin(),
+ device_keys.end(),
+ device_values.begin(),
+ device_keys_results.begin(),
+ device_values_results.begin(),
+ queue);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ size_t result_size = std::distance(device_keys_results.begin(), result.first);
+ if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+ std::cout << "ERROR: "
+ << "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1)
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_reverse.cpp b/src/boost/libs/compute/perf/perf_reverse.cpp
new file mode 100644
index 00000000..64369a78
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_reverse.cpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/reverse.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::reverse(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_reverse_copy.cpp b/src/boost/libs/compute/perf/perf_reverse_copy.cpp
new file mode 100644
index 00000000..5ce01c9b
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_reverse_copy.cpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/reverse_copy.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ // create vector on the device for reversed data
+ boost::compute::vector<int> device_reversed_vector(PERF_N, context);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::reverse_copy(
+ device_vector.begin(), device_vector.end(),
+ device_reversed_vector.begin(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_rotate.cpp b/src/boost/libs/compute/perf/perf_rotate.cpp
new file mode 100644
index 00000000..49f85d57
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_rotate.cpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/rotate.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::rotate(
+ device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_rotate_copy.cpp b/src/boost/libs/compute/perf/perf_rotate_copy.cpp
new file mode 100644
index 00000000..97111ef5
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_rotate_copy.cpp
@@ -0,0 +1,62 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/rotate_copy.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ boost::compute::vector<int> device_vector2(PERF_N, context);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::rotate_copy(
+ device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), device_vector2.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_saxpy.cpp b/src/boost/libs/compute/perf/perf_saxpy.cpp
new file mode 100644
index 00000000..99ffc55d
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_saxpy.cpp
@@ -0,0 +1,162 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <boost/compute/lambda.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/algorithm/transform.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+namespace po = boost::program_options;
+namespace compute = boost::compute;
+
+float rand_float()
+{
+ return (float(rand()) / float(RAND_MAX)) * 1000.f;
+}
+
+template<class T>
+double perf_saxpy(const compute::vector<T>& x,
+ const compute::vector<T>& y,
+ const T alpha,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ // create vector on the device to store the result
+ compute::vector<T> result(x.size(), queue.get_context());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < trials; trial++){
+ compute::fill(result.begin(), result.end(), T(0), queue);
+ queue.finish();
+
+ t.start();
+
+ using compute::lambda::_1;
+ using compute::lambda::_2;
+
+ compute::transform(
+ x.begin(), x.end(), y.begin(), result.begin(), alpha * _1 + _2, queue
+ );
+
+ queue.finish();
+ t.stop();
+ }
+
+ return t.min_time();
+}
+
+template<class T>
+void tune_saxpy(const compute::vector<T>& x,
+ const compute::vector<T>& y,
+ const T alpha,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ boost::shared_ptr<compute::detail::parameter_cache>
+ params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
+
+ const std::string cache_key =
+ std::string("__boost_copy_kernel_") + boost::lexical_cast<std::string>(sizeof(T));
+
+ const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
+ const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+
+ double min_time = (std::numeric_limits<double>::max)();
+ compute::uint_ best_tpb = 0;
+ compute::uint_ best_vpt = 0;
+
+ for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
+ params->set(cache_key, "tpb", tpbs[i]);
+ for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){
+ params->set(cache_key, "vpt", vpts[j]);
+
+ try {
+ const double t = perf_saxpy(x, y, alpha, trials, queue);
+ if(t < min_time){
+ best_tpb = tpbs[i];
+ best_vpt = vpts[j];
+ min_time = t;
+ }
+ }
+ catch(compute::opencl_error&){
+ // invalid parameters for this device, skip
+ }
+ }
+ }
+
+ // store optimal parameters
+ params->set(cache_key, "tpb", best_tpb);
+ params->set(cache_key, "vpt", best_vpt);
+}
+
+int main(int argc, char *argv[])
+{
+ // setup command line arguments
+ po::options_description options("options");
+ options.add_options()
+ ("help", "show usage instructions")
+ ("size", po::value<size_t>()->default_value(8192), "input size")
+ ("trials", po::value<size_t>()->default_value(3), "number of trials to run")
+ ("tune", "run tuning procedure")
+ ("alpha", po::value<double>()->default_value(2.5), "saxpy alpha value")
+ ;
+ po::positional_options_description positional_options;
+ positional_options.add("size", 1);
+
+ // parse command line
+ po::variables_map vm;
+ po::store(
+ po::command_line_parser(argc, argv)
+ .options(options).positional(positional_options).run(),
+ vm
+ );
+ po::notify(vm);
+
+ const size_t size = vm["size"].as<size_t>();
+ const size_t trials = vm["trials"].as<size_t>();
+ const float alpha = vm["alpha"].as<double>();
+ std::cout << "size: " << size << std::endl;
+
+ // setup context and queue for the default device
+ compute::device device = boost::compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<float> host_x(size);
+ std::vector<float> host_y(size);
+ std::generate(host_x.begin(), host_x.end(), rand_float);
+ std::generate(host_y.begin(), host_y.end(), rand_float);
+
+ // create vector on the device and copy the data
+ compute::vector<float> x(host_x.begin(), host_x.end(), queue);
+ compute::vector<float> y(host_y.begin(), host_y.end(), queue);
+
+ // run tuning proceure (if requested)
+ if(vm.count("tune")){
+ tune_saxpy(x, y, alpha, trials, queue);
+ }
+
+ // run benchmark
+ double t = perf_saxpy(x, y, alpha, trials, queue);
+ std::cout << "time: " << t / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_search.cpp b/src/boost/libs/compute/perf/perf_search.cpp
new file mode 100644
index 00000000..b76e9755
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_search.cpp
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/search.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int pattern[] = {2, 6, 6, 7, 8, 4};
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ boost::compute::vector<int> pattern_vector(pattern, pattern + 6, queue);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::search(
+ device_vector.begin(), device_vector.end(),
+ pattern_vector.begin(), pattern_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_search_n.cpp b/src/boost/libs/compute/perf/perf_search_n.cpp
new file mode 100644
index 00000000..31fa0adb
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_search_n.cpp
@@ -0,0 +1,61 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/search_n.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::search_n(
+ device_vector.begin(), device_vector.end(),
+ 5, 2, queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_set_difference.cpp b/src/boost/libs/compute/perf/perf_set_difference.cpp
new file mode 100644
index 00000000..b1b28573
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_set_difference.cpp
@@ -0,0 +1,75 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/set_difference.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vectors of random numbers on the host
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ // create vectors on the device and copy the data
+ boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
+ boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
+
+ boost::compute::copy(
+ v1.begin(), v1.end(), gpu_v1.begin(), queue
+ );
+ boost::compute::copy(
+ v2.begin(), v2.end(), gpu_v2.begin(), queue
+ );
+
+ boost::compute::vector<int> gpu_v3(PERF_N, context);
+ boost::compute::vector<int>::iterator gpu_v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ gpu_v3_end = boost::compute::set_difference(
+ gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_set_intersection.cpp b/src/boost/libs/compute/perf/perf_set_intersection.cpp
new file mode 100644
index 00000000..dbfeb42f
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_set_intersection.cpp
@@ -0,0 +1,75 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/set_intersection.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vectors of random numbers on the host
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ // create vectors on the device and copy the data
+ boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
+ boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
+
+ boost::compute::copy(
+ v1.begin(), v1.end(), gpu_v1.begin(), queue
+ );
+ boost::compute::copy(
+ v2.begin(), v2.end(), gpu_v2.begin(), queue
+ );
+
+ boost::compute::vector<int> gpu_v3(PERF_N, context);
+ boost::compute::vector<int>::iterator gpu_v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ gpu_v3_end = boost::compute::set_intersection(
+ gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp b/src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp
new file mode 100644
index 00000000..9449c585
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_set_symmetric_difference.cpp
@@ -0,0 +1,75 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/set_symmetric_difference.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vectors of random numbers on the host
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ // create vectors on the device and copy the data
+ boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
+ boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
+
+ boost::compute::copy(
+ v1.begin(), v1.end(), gpu_v1.begin(), queue
+ );
+ boost::compute::copy(
+ v2.begin(), v2.end(), gpu_v2.begin(), queue
+ );
+
+ boost::compute::vector<int> gpu_v3(PERF_N, context);
+ boost::compute::vector<int>::iterator gpu_v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ gpu_v3_end = boost::compute::set_symmetric_difference(
+ gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_set_union.cpp b/src/boost/libs/compute/perf/perf_set_union.cpp
new file mode 100644
index 00000000..3a336cb6
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_set_union.cpp
@@ -0,0 +1,75 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/set_union.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vectors of random numbers on the host
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ // create vectors on the device and copy the data
+ boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
+ boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
+
+ boost::compute::copy(
+ v1.begin(), v1.end(), gpu_v1.begin(), queue
+ );
+ boost::compute::copy(
+ v2.begin(), v2.end(), gpu_v2.begin(), queue
+ );
+
+ boost::compute::vector<int> gpu_v3(PERF_N, context);
+ boost::compute::vector<int>::iterator gpu_v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ gpu_v3_end = boost::compute::set_union(
+ gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_sort.cpp b/src/boost/libs/compute/perf/perf_sort.cpp
new file mode 100644
index 00000000..458203ed
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_sort.cpp
@@ -0,0 +1,130 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/algorithm/is_sorted.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+namespace po = boost::program_options;
+namespace compute = boost::compute;
+
+template<class T>
+double perf_sort(const std::vector<T>& data,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ compute::vector<T> vec(data.size(), queue.get_context());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < trials; trial++){
+ compute::copy(data.begin(), data.end(), vec.begin(), queue);
+ t.start();
+ compute::sort(vec.begin(), vec.end(), queue);
+ queue.finish();
+ t.stop();
+
+ if(!compute::is_sorted(vec.begin(), vec.end(), queue)){
+ std::cerr << "ERROR: is_sorted() returned false" << std::endl;
+ }
+ }
+ return t.min_time();
+}
+
+template<class T>
+void tune_sort(const std::vector<T>& data,
+ const size_t trials,
+ compute::command_queue& queue)
+{
+ boost::shared_ptr<compute::detail::parameter_cache>
+ params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
+
+ const std::string cache_key =
+ std::string("__boost_radix_sort_") + compute::type_name<T>();
+
+ const compute::uint_ tpbs[] = { 32, 64, 128, 256, 512, 1024 };
+
+ double min_time = (std::numeric_limits<double>::max)();
+ compute::uint_ best_tpb = 0;
+
+ for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
+ params->set(cache_key, "tpb", tpbs[i]);
+
+ try {
+ const double t = perf_sort(data, trials, queue);
+ if(t < min_time){
+ best_tpb = tpbs[i];
+ min_time = t;
+ }
+ }
+ catch(compute::opencl_error&){
+ // invalid work group size for this device, skip
+ }
+ }
+
+ // store optimal parameters
+ params->set(cache_key, "tpb", best_tpb);
+}
+
+int main(int argc, char *argv[])
+{
+ // setup command line arguments
+ po::options_description options("options");
+ options.add_options()
+ ("help", "show usage instructions")
+ ("size", po::value<size_t>()->default_value(8192), "input size")
+ ("trials", po::value<size_t>()->default_value(3), "number of trials to run")
+ ("tune", "run tuning procedure")
+ ;
+ po::positional_options_description positional_options;
+ positional_options.add("size", 1);
+
+ // parse command line
+ po::variables_map vm;
+ po::store(
+ po::command_line_parser(argc, argv)
+ .options(options).positional(positional_options).run(),
+ vm
+ );
+ po::notify(vm);
+
+ const size_t size = vm["size"].as<size_t>();
+ const size_t trials = vm["trials"].as<size_t>();
+ std::cout << "size: " << size << std::endl;
+
+ // setup context and queue for the default device
+ compute::device device = boost::compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<unsigned int> data(size);
+ std::generate(data.begin(), data.end(), rand);
+
+ // run tuning proceure (if requested)
+ if(vm.count("tune")){
+ tune_sort(data, trials, queue);
+ }
+
+ // run sort benchmark
+ double t = perf_sort(data, trials, queue);
+ std::cout << "time: " << t / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_sort_by_key.cpp b/src/boost/libs/compute/perf/perf_sort_by_key.cpp
new file mode 100644
index 00000000..57c3fc83
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_sort_by_key.cpp
@@ -0,0 +1,79 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/sort_by_key.hpp>
+#include <boost/compute/algorithm/is_sorted.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/types/fundamental.hpp>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ using boost::compute::int_;
+ using boost::compute::long_;
+
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int_> host_keys(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), rand);
+ std::vector<long_> host_values(PERF_N);
+ std::copy(host_keys.begin(), host_keys.end(), host_values.begin());
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int_> device_keys(PERF_N, context);
+ boost::compute::vector<long_> device_values(PERF_N, context);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_keys.begin(), host_keys.end(), device_keys.begin(), queue
+ );
+ boost::compute::copy(
+ host_values.begin(), host_values.end(), device_values.begin(), queue
+ );
+
+ t.start();
+ // sort vector
+ boost::compute::sort_by_key(
+ device_keys.begin(), device_keys.end(), device_values.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify keys are sorted
+ if(!boost::compute::is_sorted(device_keys.begin(), device_keys.end(), queue)){
+ std::cout << "ERROR: is_sorted() returned false for the keys" << std::endl;
+ return -1;
+ }
+ // verify values are sorted
+ if(!boost::compute::is_sorted(device_values.begin(), device_values.end(), queue)){
+ std::cout << "ERROR: is_sorted() returned false for the values" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_sort_float.cpp b/src/boost/libs/compute/perf/perf_sort_float.cpp
new file mode 100644
index 00000000..1b2d5f5e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_sort_float.cpp
@@ -0,0 +1,72 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/is_sorted.hpp>
+#include <boost/compute/algorithm/sort.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+float rand_float()
+{
+ return ((rand() / float(RAND_MAX)) - 0.5f) * 100000.0f;
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<float> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_float);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<float> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(),
+ host_vector.end(),
+ device_vector.begin(),
+ queue
+ );
+
+ // sort vector
+ perf_timer t;
+ t.start();
+ boost::compute::sort(
+ device_vector.begin(),
+ device_vector.end(),
+ queue
+ );
+ queue.finish();
+ t.stop();
+ std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl;
+
+ // verify vector is sorted
+ if(!boost::compute::is_sorted(device_vector.begin(),
+ device_vector.end(),
+ queue)){
+ std::cout << "ERROR: is_sorted() returned false" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stable_partition.cpp b/src/boost/libs/compute/perf/perf_stable_partition.cpp
new file mode 100644
index 00000000..f7ef1063
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stable_partition.cpp
@@ -0,0 +1,62 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/stable_partition.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+
+ using boost::compute::_1;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ boost::compute::stable_partition(
+ device_vector.begin(), device_vector.end(), _1 < 10, queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_accumulate.cpp b/src/boost/libs/compute/perf/perf_stl_accumulate.cpp
new file mode 100644
index 00000000..c28d2d03
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_accumulate.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int sum = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0));
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "sum: " << sum << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_count.cpp b/src/boost/libs/compute/perf/perf_stl_count.cpp
new file mode 100644
index 00000000..9df13f86
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_count.cpp
@@ -0,0 +1,45 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // count values equal to four in the vector
+ size_t count = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ count = std::count(
+ host_vector.begin(), host_vector.end(), 4
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "count: " << count << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_find.cpp b/src/boost/libs/compute/perf/perf_stl_find.cpp
new file mode 100644
index 00000000..b5d3eed2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_find.cpp
@@ -0,0 +1,58 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "perf.hpp"
+
+// Max integer that can be generated by rand_int() function.
+int rand_int_max = 25;
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * rand_int_max);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // trying to find element that isn't in vector (worst-case scenario)
+ int wanted = rand_int_max + 1;
+
+ // result
+ std::vector<int>::iterator host_result_it;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify
+ if(host_result_it != host_vector.end()){
+ std::cout << "ERROR: "
+ << "host_result_iterator != "
+ << "host_vector.end()"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_find_end.cpp b/src/boost/libs/compute/perf/perf_stl_find_end.cpp
new file mode 100644
index 00000000..cb1233b1
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_find_end.cpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int pattern[] = {2, 6, 6, 7, 8, 4};
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::find_end(host_vector.begin(), host_vector.end(),
+ pattern, pattern + 6);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_includes.cpp b/src/boost/libs/compute/perf/perf_stl_includes.cpp
new file mode 100644
index 00000000..b71dbf3c
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_includes.cpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> v1(PERF_N);
+ std::generate(v1.begin(), v1.end(), rand_int);
+
+ std::vector<int> v2(v1);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::includes(
+ v1.begin(), v1.end(),
+ v2.begin(), v2.end()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_inner_product.cpp b/src/boost/libs/compute/perf/perf_stl_inner_product.cpp
new file mode 100644
index 00000000..884f06c9
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_inner_product.cpp
@@ -0,0 +1,46 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> h1(PERF_N);
+ std::vector<int> h2(PERF_N);
+ std::generate(h1.begin(), h1.end(), rand_int);
+ std::generate(h2.begin(), h2.end(), rand_int);
+
+ int product = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ product = std::inner_product(
+ h1.begin(), h1.end(), h2.begin(), int(0)
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "product: " << product << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_is_permutation.cpp b/src/boost/libs/compute/perf/perf_stl_is_permutation.cpp
new file mode 100644
index 00000000..1384e8e9
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_is_permutation.cpp
@@ -0,0 +1,45 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ std::vector<int> host_vector2(PERF_N);
+ std::copy(host_vector.rbegin(), host_vector.rend(), host_vector2.begin());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::is_permutation(host_vector.begin(), host_vector.end(),
+ host_vector2.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_max_element.cpp b/src/boost/libs/compute/perf/perf_stl_max_element.cpp
new file mode 100644
index 00000000..3fa61267
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_max_element.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Rastko Anicic <anicic.rastko@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>(rand() % 10000000);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int max = 0;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ max = *(std::max_element(host_vector.begin(), host_vector.end()));
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "max: " << max << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_merge.cpp b/src/boost/libs/compute/perf/perf_stl_merge.cpp
new file mode 100644
index 00000000..0a842a04
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_merge.cpp
@@ -0,0 +1,38 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ std::vector<int> v1 = generate_random_vector<int>(std::floor(PERF_N / 2.0));
+ std::vector<int> v2 = generate_random_vector<int>(std::ceil(PERF_N / 2.0));
+ std::vector<int> v3(PERF_N);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_next_permutation.cpp b/src/boost/libs/compute/perf/perf_stl_next_permutation.cpp
new file mode 100644
index 00000000..22148975
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_next_permutation.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ std::sort(host_vector.begin(), host_vector.end(), std::greater<int>());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::next_permutation(host_vector.begin(), host_vector.end());
+ t.stop();
+ std::prev_permutation(host_vector.begin(), host_vector.end());
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_partial_sum.cpp b/src/boost/libs/compute/perf/perf_stl_partial_sum.cpp
new file mode 100644
index 00000000..533defb2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_partial_sum.cpp
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ using boost::compute::int_;
+
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int_> v(PERF_N);
+ std::vector<int_> r(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ std::generate(v.begin(), v.end(), rand_int);
+ t.start();
+ std::partial_sum(
+ v.begin(),
+ v.end(),
+ r.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_partition.cpp b/src/boost/libs/compute/perf/perf_stl_partition.cpp
new file mode 100644
index 00000000..56aadd0d
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_partition.cpp
@@ -0,0 +1,46 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+bool less_than_10(int value)
+{
+ return value < 10;
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::partition(host_vector.begin(), host_vector.end(), less_than_10);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_partition_point.cpp b/src/boost/libs/compute/perf/perf_stl_partition_point.cpp
new file mode 100644
index 00000000..94b1c263
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_partition_point.cpp
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+bool less_than_20(int value)
+{
+ return value < 20;
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ std::partition(host_vector.begin(), host_vector.end(),
+ less_than_20);
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::partition_point(host_vector.begin(), host_vector.end(),
+ less_than_20);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp b/src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp
new file mode 100644
index 00000000..f246ba2a
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_prev_permutation.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ std::sort(host_vector.begin(), host_vector.end());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::prev_permutation(host_vector.begin(), host_vector.end());
+ t.stop();
+ std::next_permutation(host_vector.begin(), host_vector.end());
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_reverse.cpp b/src/boost/libs/compute/perf/perf_stl_reverse.cpp
new file mode 100644
index 00000000..b2ee2ef6
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_reverse.cpp
@@ -0,0 +1,41 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::reverse(host_vector.begin(), host_vector.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp b/src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp
new file mode 100644
index 00000000..1397e9a7
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_reverse_copy.cpp
@@ -0,0 +1,45 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector for reversed data
+ std::vector<int> host_reversed_vector(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::reverse_copy(host_vector.begin(), host_vector.end(),
+ host_reversed_vector.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_rotate.cpp b/src/boost/libs/compute/perf/perf_stl_rotate.cpp
new file mode 100644
index 00000000..f90acef2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_rotate.cpp
@@ -0,0 +1,41 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::rotate(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp b/src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp
new file mode 100644
index 00000000..516d11c2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_rotate_copy.cpp
@@ -0,0 +1,43 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ std::vector<int> host_vector2(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::rotate_copy(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end(), host_vector2.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_saxpy.cpp b/src/boost/libs/compute/perf/perf_stl_saxpy.cpp
new file mode 100644
index 00000000..8ab33535
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_saxpy.cpp
@@ -0,0 +1,52 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "perf.hpp"
+
+float rand_float()
+{
+ return (float(rand()) / float(RAND_MAX)) * 1000.f;
+}
+
+// y <- alpha * x + y
+void serial_saxpy(size_t n, float alpha, const float *x, float *y)
+{
+ for(size_t i = 0; i < n; i++){
+ y[i] = alpha * x[i] + y[i];
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ float alpha = 2.5f;
+
+ std::vector<float> host_x(PERF_N);
+ std::vector<float> host_y(PERF_N);
+ std::generate(host_x.begin(), host_x.end(), rand_float);
+ std::generate(host_y.begin(), host_y.end(), rand_float);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ serial_saxpy(PERF_N, alpha, &host_x[0], &host_y[0]);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_search.cpp b/src/boost/libs/compute/perf/perf_stl_search.cpp
new file mode 100644
index 00000000..8166d35e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_search.cpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int pattern[] = {2, 6, 6, 7, 8, 4};
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::search(host_vector.begin(), host_vector.end(),
+ pattern, pattern + 6);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_search_n.cpp b/src/boost/libs/compute/perf/perf_stl_search_n.cpp
new file mode 100644
index 00000000..76a6bb07
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_search_n.cpp
@@ -0,0 +1,41 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::search_n(host_vector.begin(), host_vector.end(), 5, 2);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_set_difference.cpp b/src/boost/libs/compute/perf/perf_stl_set_difference.cpp
new file mode 100644
index 00000000..c5d0802d
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_set_difference.cpp
@@ -0,0 +1,54 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ std::vector<int> v3(PERF_N);
+ std::vector<int>::iterator v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ v3_end = std::set_difference(
+ v1.begin(), v1.end(),
+ v2.begin(), v2.end(),
+ v3.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_set_intersection.cpp b/src/boost/libs/compute/perf/perf_stl_set_intersection.cpp
new file mode 100644
index 00000000..6aa3493b
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_set_intersection.cpp
@@ -0,0 +1,54 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ std::vector<int> v3(PERF_N);
+ std::vector<int>::iterator v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ v3_end = std::set_intersection(
+ v1.begin(), v1.end(),
+ v2.begin(), v2.end(),
+ v3.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp b/src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp
new file mode 100644
index 00000000..c22c70a7
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_set_symmetric_difference.cpp
@@ -0,0 +1,54 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ std::vector<int> v3(PERF_N);
+ std::vector<int>::iterator v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ v3_end = std::set_symmetric_difference(
+ v1.begin(), v1.end(),
+ v2.begin(), v2.end(),
+ v3.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_set_union.cpp b/src/boost/libs/compute/perf/perf_stl_set_union.cpp
new file mode 100644
index 00000000..9d4fe3a8
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_set_union.cpp
@@ -0,0 +1,54 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> v1(std::floor(PERF_N / 2.0));
+ std::vector<int> v2(std::ceil(PERF_N / 2.0));
+
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ std::vector<int> v3(PERF_N);
+ std::vector<int>::iterator v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ v3_end = std::set_union(
+ v1.begin(), v1.end(),
+ v2.begin(), v2.end(),
+ v3.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_sort.cpp b/src/boost/libs/compute/perf/perf_stl_sort.cpp
new file mode 100644
index 00000000..c9d4294b
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_sort.cpp
@@ -0,0 +1,33 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ std::vector<int> v;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ v = generate_random_vector<int>(PERF_N);
+ t.start();
+ std::sort(v.begin(), v.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_stable_partition.cpp b/src/boost/libs/compute/perf/perf_stl_stable_partition.cpp
new file mode 100644
index 00000000..ee4993cc
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_stable_partition.cpp
@@ -0,0 +1,47 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+bool less_than_10(int value)
+{
+ return value < 10;
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ std::stable_partition(host_vector.begin(), host_vector.end(),
+ less_than_10);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_unique.cpp b/src/boost/libs/compute/perf/perf_stl_unique.cpp
new file mode 100644
index 00000000..f4f97b4b
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_unique.cpp
@@ -0,0 +1,41 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ t.start();
+ std::unique(host_vector.begin(), host_vector.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_stl_unique_copy.cpp b/src/boost/libs/compute/perf/perf_stl_unique_copy.cpp
new file mode 100644
index 00000000..77705fa2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_stl_unique_copy.cpp
@@ -0,0 +1,44 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::vector<int> host_vector2(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+ t.start();
+ std::unique_copy(
+ host_vector.begin(), host_vector.end(), host_vector2.begin()
+ );
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_tbb_accumulate.cpp b/src/boost/libs/compute/perf/perf_tbb_accumulate.cpp
new file mode 100644
index 00000000..319ad8af
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_tbb_accumulate.cpp
@@ -0,0 +1,75 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <tbb/blocked_range.h>
+#include <tbb/parallel_reduce.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+template<class T>
+struct Sum {
+ T value;
+ Sum() : value(0) {}
+ Sum( Sum& s, tbb::split ) {value = 0;}
+ void operator()( const tbb::blocked_range<T*>& r ) {
+ T temp = value;
+ for( T* a=r.begin(); a!=r.end(); ++a ) {
+ temp += *a;
+ }
+ value = temp;
+ }
+ void join( Sum& rhs ) {value += rhs.value;}
+};
+
+template<class T>
+T ParallelSum( T array[], size_t n ) {
+ Sum<T> total;
+ tbb::parallel_reduce( tbb::blocked_range<T*>( array, array+n ),
+ total );
+ return total.value;
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ int sum = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ sum = ParallelSum<int>(&host_vector[0], host_vector.size());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "sum: " << sum << std::endl;
+
+ int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0));
+ if(sum != host_sum){
+ std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_tbb_merge.cpp b/src/boost/libs/compute/perf/perf_tbb_merge.cpp
new file mode 100644
index 00000000..a7aa814e
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_tbb_merge.cpp
@@ -0,0 +1,95 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <tbb/parallel_for.h>
+
+#include "perf.hpp"
+
+// example from: http://www.threadingbuildingblocks.org/docs/help/reference/algorithms/parallel_for_func.htm
+using namespace tbb;
+
+template<typename Iterator>
+struct ParallelMergeRange {
+ static size_t grainsize;
+ Iterator begin1, end1; // [begin1,end1) is 1st sequence to be merged
+ Iterator begin2, end2; // [begin2,end2) is 2nd sequence to be merged
+ Iterator out; // where to put merged sequence
+ bool empty() const {return (end1-begin1)+(end2-begin2)==0;}
+ bool is_divisible() const {
+ return (std::min)( end1-begin1, end2-begin2 ) > grainsize;
+ }
+ ParallelMergeRange( ParallelMergeRange& r, split ) {
+ if( r.end1-r.begin1 < r.end2-r.begin2 ) {
+ std::swap(r.begin1,r.begin2);
+ std::swap(r.end1,r.end2);
+ }
+ Iterator m1 = r.begin1 + (r.end1-r.begin1)/2;
+ Iterator m2 = std::lower_bound( r.begin2, r.end2, *m1 );
+ begin1 = m1;
+ begin2 = m2;
+ end1 = r.end1;
+ end2 = r.end2;
+ out = r.out + (m1-r.begin1) + (m2-r.begin2);
+ r.end1 = m1;
+ r.end2 = m2;
+ }
+ ParallelMergeRange( Iterator begin1_, Iterator end1_,
+ Iterator begin2_, Iterator end2_,
+ Iterator out_ ) :
+ begin1(begin1_), end1(end1_),
+ begin2(begin2_), end2(end2_), out(out_)
+ {}
+};
+
+template<typename Iterator>
+size_t ParallelMergeRange<Iterator>::grainsize = 1000;
+
+template<typename Iterator>
+struct ParallelMergeBody {
+ void operator()( ParallelMergeRange<Iterator>& r ) const {
+ std::merge( r.begin1, r.end1, r.begin2, r.end2, r.out );
+ }
+};
+
+template<typename Iterator>
+void ParallelMerge( Iterator begin1, Iterator end1, Iterator begin2, Iterator end2, Iterator out ) {
+ parallel_for(
+ ParallelMergeRange<Iterator>(begin1,end1,begin2,end2,out),
+ ParallelMergeBody<Iterator>(),
+ simple_partitioner()
+ );
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ std::vector<int> v1 = generate_random_vector<int>(PERF_N / 2);
+ std::vector<int> v2 = generate_random_vector<int>(PERF_N / 2);
+ std::vector<int> v3(PERF_N);
+
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ ParallelMerge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_tbb_sort.cpp b/src/boost/libs/compute/perf/perf_tbb_sort.cpp
new file mode 100644
index 00000000..2f79b5b2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_tbb_sort.cpp
@@ -0,0 +1,35 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <vector>
+
+#include <tbb/parallel_sort.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ std::vector<int> v(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ v = generate_random_vector<int>(PERF_N);
+ t.start();
+ tbb::parallel_sort(v.begin(), v.end());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_accumulate.cu b/src/boost/libs/compute/perf/perf_thrust_accumulate.cu
new file mode 100644
index 00000000..76b72321
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_accumulate.cu
@@ -0,0 +1,45 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/reduce.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec = h_vec;
+
+ int sum = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ sum = thrust::reduce(d_vec.begin(), d_vec.end());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "sum: " << sum << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_count.cu b/src/boost/libs/compute/perf/perf_thrust_count.cu
new file mode 100644
index 00000000..d69df901
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_count.cu
@@ -0,0 +1,49 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <thrust/count.h>
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ thrust::host_vector<int> host_vector(PERF_N);
+ thrust::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ thrust::device_vector<int> v = host_vector;
+
+ size_t count = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ count = thrust::count(v.begin(), v.end(), 4);
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "count: " << count << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu b/src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu
new file mode 100644
index 00000000..df1367a2
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_exclusive_scan.cu
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Benoit
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/scan.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec = h_vec;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ thrust::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_find.cu b/src/boost/libs/compute/perf/perf_thrust_find.cu
new file mode 100644
index 00000000..e1482604
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_find.cu
@@ -0,0 +1,65 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <thrust/find.h>
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+
+#include "perf.hpp"
+
+// Max integer that can be generated by rand_int() function.
+int rand_int_max = 25;
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * rand_int_max);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of random numbers on the host
+ thrust::host_vector<int> host_vector(PERF_N);
+ thrust::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ thrust::device_vector<int> v = host_vector;
+
+ // trying to find element that isn't in vector (worst-case scenario)
+ int wanted = rand_int_max + 1;
+
+ // result
+ thrust::device_vector<int>::iterator device_result_it;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ device_result_it = thrust::find(v.begin(), v.end(), wanted);
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // verify
+ if(device_result_it != v.end()){
+ std::cout << "ERROR: "
+ << "device_result_iterator != "
+ << "v.end()"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_inner_product.cu b/src/boost/libs/compute/perf/perf_thrust_inner_product.cu
new file mode 100644
index 00000000..6d01fc53
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_inner_product.cu
@@ -0,0 +1,49 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <iterator>
+#include <algorithm>
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/inner_product.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> host_x(PERF_N);
+ thrust::host_vector<int> host_y(PERF_N);
+ std::generate(host_x.begin(), host_x.end(), rand);
+ std::generate(host_y.begin(), host_y.end(), rand);
+
+ // transfer data to the device
+ thrust::device_vector<int> device_x = host_x;
+ thrust::device_vector<int> device_y = host_y;
+
+ int product = 0;
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ product = thrust::inner_product(
+ device_x.begin(), device_x.end(), device_y.begin(), 0
+ );
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "product: " << product << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_merge.cu b/src/boost/libs/compute/perf/perf_thrust_merge.cu
new file mode 100644
index 00000000..f269c939
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_merge.cu
@@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <iterator>
+#include <algorithm>
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/merge.h>
+#include <thrust/sort.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> v1(std::floor(PERF_N / 2.0));
+ thrust::host_vector<int> v2(std::ceil(PERF_N / 2.0));
+ std::generate(v1.begin(), v1.end(), rand);
+ std::generate(v2.begin(), v2.end(), rand);
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ // transfer data to the device
+ thrust::device_vector<int> gpu_v1 = v1;
+ thrust::device_vector<int> gpu_v2 = v2;
+ thrust::device_vector<int> gpu_v3(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ thrust::merge(
+ gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin()
+ );
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ thrust::host_vector<int> check_v3 = gpu_v3;
+
+ thrust::host_vector<int> v3(PERF_N);
+ std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
+ bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin());
+ if(!ok){
+ std::cerr << "ERROR: merged ranges different" << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_partial_sum.cu b/src/boost/libs/compute/perf/perf_thrust_partial_sum.cu
new file mode 100644
index 00000000..e30e80b5
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_partial_sum.cu
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/scan.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec = h_vec;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ thrust::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_partition.cu b/src/boost/libs/compute/perf/perf_thrust_partition.cu
new file mode 100644
index 00000000..5c89014c
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_partition.cu
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/partition.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct less_than_ten : public thrust::unary_function<bool, int>
+{
+ __device__ bool operator()(int x) const
+ {
+ return x < 10;
+ }
+};
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec(PERF_N);
+ std::generate(h_vec.begin(), h_vec.end(), rand_int);
+
+ thrust::device_vector<int> d_vec(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ thrust::partition(
+ d_vec.begin(), d_vec.end(), less_than_ten()
+ );
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu b/src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu
new file mode 100644
index 00000000..a445c137
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_reduce_by_key.cu
@@ -0,0 +1,92 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/reduce.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+ int current;
+ int avgValuesNoPerKey;
+
+ unique_key()
+ {
+ current = 0;
+ avgValuesNoPerKey = 512;
+ }
+
+ int operator()()
+ {
+ double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+ if((rand() / double(RAND_MAX)) <= p)
+ return ++current;
+ return current;
+ }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of keys and random values
+ thrust::host_vector<int> host_keys(PERF_N);
+ thrust::host_vector<int> host_values(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+ std::generate(host_values.begin(), host_values.end(), rand_int);
+
+ // transfer data to the device
+ thrust::device_vector<int> device_keys = host_keys;
+ thrust::device_vector<int> device_values = host_values;
+
+ // create device vectors for the results
+ thrust::device_vector<int> device_keys_results(PERF_N);
+ thrust::device_vector<int> device_values_results(PERF_N);
+
+ typedef typename thrust::device_vector<int>::iterator iterType;
+ thrust::pair<iterType, iterType> result;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ result = thrust::reduce_by_key(device_keys.begin(),
+ device_keys.end(),
+ device_values.begin(),
+ device_keys_results.begin(),
+ device_values_results.begin());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ size_t result_size = thrust::distance(device_keys_results.begin(), result.first);
+ if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+ std::cout << "ERROR: "
+ << "wrong number of keys"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_reverse.cu b/src/boost/libs/compute/perf/perf_thrust_reverse.cu
new file mode 100644
index 00000000..1927ca6a
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_reverse.cu
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/reverse.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ thrust::reverse(d_vec.begin(), d_vec.end());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu b/src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu
new file mode 100644
index 00000000..af1a044d
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_reverse_copy.cu
@@ -0,0 +1,47 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/reverse.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec;
+ d_vec = h_vec;
+
+ // device vector for reversed data
+ thrust::device_vector<int> d_reversed_vec(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ thrust::reverse_copy(d_vec.begin(), d_vec.end(), d_reversed_vec.begin());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_rotate.cu b/src/boost/libs/compute/perf/perf_thrust_rotate.cu
new file mode 100644
index 00000000..108bb99b
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_rotate.cu
@@ -0,0 +1,51 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec;
+
+ size_t rotate_distance = PERF_N / 2;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ // there is no thrust::rotate() so we implement it manually with copy()
+ thrust::device_vector<int> tmp(d_vec.begin(), d_vec.begin() + rotate_distance);
+ thrust::copy(d_vec.begin() + rotate_distance, d_vec.end(), d_vec.begin());
+ thrust::copy(tmp.begin(), tmp.end(), d_vec.begin() + rotate_distance);
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_saxpy.cu b/src/boost/libs/compute/perf/perf_thrust_saxpy.cu
new file mode 100644
index 00000000..aa35a191
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_saxpy.cu
@@ -0,0 +1,63 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <iterator>
+#include <algorithm>
+
+#include <thrust/device_vector.h>
+#include <thrust/functional.h>
+#include <thrust/host_vector.h>
+#include <thrust/transform.h>
+
+#include "perf.hpp"
+
+struct saxpy_functor : public thrust::binary_function<float,float,float>
+{
+ const float a;
+
+ saxpy_functor(float _a) : a(_a) {}
+
+ __host__ __device__
+ float operator()(const float& x, const float& y) const
+ {
+ return a * x + y;
+ }
+};
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> host_x(PERF_N);
+ thrust::host_vector<int> host_y(PERF_N);
+ std::generate(host_x.begin(), host_x.end(), rand);
+ std::generate(host_y.begin(), host_y.end(), rand);
+
+ // transfer data to the device
+ thrust::device_vector<int> device_x = host_x;
+ thrust::device_vector<int> device_y = host_y;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ thrust::transform(device_x.begin(), device_x.end(), device_y.begin(), device_y.begin(), saxpy_functor(2.5f));
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ thrust::copy(device_x.begin(), device_x.end(), host_x.begin());
+ thrust::copy(device_y.begin(), device_y.end(), host_y.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_set_difference.cu b/src/boost/libs/compute/perf/perf_thrust_set_difference.cu
new file mode 100644
index 00000000..3465f214
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_set_difference.cu
@@ -0,0 +1,61 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <iterator>
+#include <algorithm>
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/set_operations.h>
+#include <thrust/sort.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> v1(std::floor(PERF_N / 2.0));
+ thrust::host_vector<int> v2(std::ceil(PERF_N / 2.0));
+ std::generate(v1.begin(), v1.end(), rand_int);
+ std::generate(v2.begin(), v2.end(), rand_int);
+ std::sort(v1.begin(), v1.end());
+ std::sort(v2.begin(), v2.end());
+
+ // transfer data to the device
+ thrust::device_vector<int> gpu_v1 = v1;
+ thrust::device_vector<int> gpu_v2 = v2;
+ thrust::device_vector<int> gpu_v3(PERF_N);
+
+ thrust::device_vector<int>::iterator gpu_v3_end;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ gpu_v3_end = thrust::set_difference(
+ gpu_v1.begin(), gpu_v1.end(),
+ gpu_v2.begin(), gpu_v2.end(),
+ gpu_v3.begin()
+ );
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+ std::cout << "size: " << thrust::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_sort.cu b/src/boost/libs/compute/perf/perf_thrust_sort.cu
new file mode 100644
index 00000000..b2d90939
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_sort.cu
@@ -0,0 +1,48 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/sort.h>
+
+#include "perf.hpp"
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
+
+ // transfer data to the device
+ thrust::device_vector<int> d_vec;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ thrust::sort(d_vec.begin(), d_vec.end());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ // transfer data back to host
+ thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_thrust_unique.cu b/src/boost/libs/compute/perf/perf_thrust_unique.cu
new file mode 100644
index 00000000..6030f291
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_thrust_unique.cu
@@ -0,0 +1,50 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/unique.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+ thrust::host_vector<int> h_vec(PERF_N);
+ std::generate(h_vec.begin(), h_vec.end(), rand_int);
+
+ thrust::device_vector<int> d_vec(PERF_N);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ d_vec = h_vec;
+
+ t.start();
+ thrust::unique(d_vec.begin(), d_vec.end());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp b/src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp
new file mode 100644
index 00000000..57973a81
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_uniform_int_distribution.cpp
@@ -0,0 +1,46 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/container/vector.hpp>
+#include <boost/compute/random/default_random_engine.hpp>
+#include <boost/compute/random/uniform_int_distribution.hpp>
+
+#include "perf.hpp"
+
+namespace compute = boost::compute;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ compute::device device = compute::system::default_device();
+ compute::context context(device);
+ compute::command_queue queue(context, device);
+
+ compute::vector<compute::uint_> vector(PERF_N, context);
+
+ compute::default_random_engine rng(queue);
+ compute::uniform_int_distribution<compute::uint_> dist(0, 1);
+
+ perf_timer t;
+ t.start();
+ dist.generate(vector.begin(), vector.end(), rng, queue);
+ queue.finish();
+ t.stop();
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_unique.cpp b/src/boost/libs/compute/perf/perf_unique.cpp
new file mode 100644
index 00000000..b25801f1
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_unique.cpp
@@ -0,0 +1,60 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/unique.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+ t.start();
+ boost::compute::unique(
+ device_vector.begin(), device_vector.end(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perf_unique_copy.cpp b/src/boost/libs/compute/perf/perf_unique_copy.cpp
new file mode 100644
index 00000000..d7ff98af
--- /dev/null
+++ b/src/boost/libs/compute/perf/perf_unique_copy.cpp
@@ -0,0 +1,61 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/unique_copy.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of random numbers on the host
+ std::vector<int> host_vector(PERF_N);
+ std::generate(host_vector.begin(), host_vector.end(), rand_int);
+
+ // create vector on the device and copy the data
+ boost::compute::vector<int> device_vector(PERF_N, context);
+ boost::compute::vector<int> device_vector2(PERF_N, context);
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ boost::compute::copy(
+ host_vector.begin(), host_vector.end(), device_vector.begin(), queue
+ );
+ t.start();
+ boost::compute::unique_copy(
+ device_vector.begin(), device_vector.end(), device_vector2.begin(), queue
+ );
+ queue.finish();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ return 0;
+}
diff --git a/src/boost/libs/compute/perf/perfdoc.py b/src/boost/libs/compute/perf/perfdoc.py
new file mode 100755
index 00000000..e9c60362
--- /dev/null
+++ b/src/boost/libs/compute/perf/perfdoc.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+# Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com>
+# Distributed under the Boost Software License, Version 1.0
+# See accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt
+#
+# See http://boostorg.github.com/compute for more information.
+
+import os
+import sys
+import pylab
+
+from perf import run_benchmark
+
+fignum = 0
+
+def plot_to_file(report, filename):
+ global fignum
+ fignum += 1
+ pylab.figure(fignum)
+
+ run_to_label = {
+ "stl" : "C++ STL",
+ "thrust" : "Thrust",
+ "compute" : "Boost.Compute",
+ "bolt" : "Bolt"
+ }
+
+ for run in sorted(report.samples.keys()):
+ x = []
+ y = []
+
+ for sample in report.samples[run]:
+ x.append(sample[0])
+ y.append(sample[1])
+
+ pylab.loglog(x, y, marker='o', label=run_to_label[run])
+
+ pylab.xlabel("Size")
+ pylab.ylabel("Time (ms)")
+ pylab.legend(loc='upper left')
+ pylab.savefig(filename)
+
+if __name__ == '__main__':
+ sizes = [pow(2, x) for x in range(10, 26)]
+ algorithms = [
+ "accumulate",
+ "count",
+ "inner_product",
+ "merge",
+ "partial_sum",
+ "partition",
+ "reverse",
+ "rotate",
+ "saxpy",
+ "sort",
+ "unique",
+ ]
+
+ try:
+ os.mkdir("perf_plots")
+ except OSError:
+ pass
+
+ for algorithm in algorithms:
+ print("running '%s'" % (algorithm))
+ report = run_benchmark(algorithm, sizes, ["stl", "thrust", "bolt"])
+ plot_to_file(report, "perf_plots/%s_time_plot.png" % algorithm)
+