diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/boost/libs/compute/example | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/boost/libs/compute/example')
38 files changed, 5022 insertions, 0 deletions
diff --git a/src/boost/libs/compute/example/CMakeLists.txt b/src/boost/libs/compute/example/CMakeLists.txt new file mode 100644 index 00000000..a5708cb0 --- /dev/null +++ b/src/boost/libs/compute/example/CMakeLists.txt @@ -0,0 +1,166 @@ +# --------------------------------------------------------------------------- +# Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +# +# Distributed under the Boost Software License, Version 1.0 +# See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt +# +# --------------------------------------------------------------------------- + +include_directories(../include) + +set(EXAMPLES + amd_cpp_kernel + black_scholes + copy_data + fizz_buzz + hello_world + host_sort + inline_ptx + longest_vector + list_devices + mapped_view + memory_limits + monte_carlo + point_centroid + price_cross + print_vector + sort_vector + simple_kernel + time_copy + transform_sqrt + vector_addition + simple_moving_average + matrix_transpose +) + +# boost library link dependencies +set(EXAMPLE_BOOST_COMPONENTS program_options) + +if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) + set(EXAMPLE_BOOST_COMPONENTS ${EXAMPLE_BOOST_COMPONENTS} system filesystem) +endif() + +if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11}) + set(EXAMPLE_BOOST_COMPONENTS ${EXAMPLE_BOOST_COMPONENTS} system thread) +endif() + +if(MSVC AND EXAMPLE_BOOST_COMPONENTS) + set(EXAMPLE_BOOST_COMPONENTS ${EXAMPLE_BOOST_COMPONENTS} chrono) +endif() + +if(EXAMPLE_BOOST_COMPONENTS) + list(REMOVE_DUPLICATES EXAMPLE_BOOST_COMPONENTS) +endif() +find_package(Boost 1.54 REQUIRED COMPONENTS ${EXAMPLE_BOOST_COMPONENTS}) +include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) + +foreach(EXAMPLE ${EXAMPLES}) + add_executable(${EXAMPLE} ${EXAMPLE}.cpp) + target_link_libraries(${EXAMPLE} ${OpenCL_LIBRARIES} ${Boost_LIBRARIES}) + + # add example program to list of tests (if testing is enabled) + if(${BOOST_COMPUTE_BUILD_TESTS}) + add_test("example.${EXAMPLE}" ${EXAMPLE}) + endif() +endforeach() + +# opencl test example +add_executable(opencl_test opencl_test.cpp) +target_link_libraries(opencl_test ${OpenCL_LIBRARIES}) + +# eigen examples +if(${BOOST_COMPUTE_HAVE_EIGEN}) + find_package(Eigen REQUIRED) + include_directories(SYSTEM ${EIGEN_INCLUDE_DIRS}) + add_executable(batched_determinant batched_determinant.cpp) + target_link_libraries(batched_determinant ${OpenCL_LIBRARIES} ${Boost_LIBRARIES}) +endif() + +# opencv examples +if(${BOOST_COMPUTE_HAVE_OPENCV}) + find_package(OpenCV REQUIRED) + include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS}) + + set(OPENCV_EXAMPLES + k_means + opencv_flip + random_walk + opencv_optical_flow + opencv_convolution + opencv_sobel_filter + opencv_histogram + ) + + foreach(EXAMPLE ${OPENCV_EXAMPLES}) + add_executable(${EXAMPLE} ${EXAMPLE}.cpp) + target_link_libraries(${EXAMPLE} ${OpenCL_LIBRARIES} ${Boost_LIBRARIES} ${OpenCV_LIBS}) + endforeach() +endif() + +# opengl/vtk examples +if(${BOOST_COMPUTE_HAVE_VTK}) + find_package(VTK REQUIRED) + include(${VTK_USE_FILE}) + add_executable(opengl_sphere opengl_sphere.cpp) + target_link_libraries(opengl_sphere ${OpenCL_LIBRARIES} ${Boost_LIBRARIES} ${VTK_LIBRARIES}) + if(APPLE) + target_link_libraries(opengl_sphere "-framework OpenGL") + elseif(UNIX) + target_link_libraries(opengl_sphere GL) + endif() +endif() + +# qt examples +if(${BOOST_COMPUTE_HAVE_QT}) + + # look for Qt4 in the first place + find_package(Qt4 QUIET) + + if(${QT4_FOUND}) + # build with Qt4 + find_package(Qt4 REQUIRED COMPONENTS QtCore QtGui QtOpenGL) + set(QT_USE_QTOPENGL TRUE) + include(${QT_USE_FILE}) + else() + + # look for Qt5 + find_package(Qt5Widgets QUIET) + + if(${Qt5Widgets_FOUND}) + # build with Qt5 + find_package(Qt5Core REQUIRED) + find_package(Qt5Widgets REQUIRED) + find_package(Qt5OpenGL REQUIRED) + include_directories(${Qt5OpenGL_INCLUDE_DIRS}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Qt5OpenGL_EXECUTABLE_COMPILE_FLAGS}") + set(QT_LIBRARIES ${Qt5OpenGL_LIBRARIES}) + else() + # no valid Qt framework found + message(FATAL_ERROR "Error: Did not find Qt4 or Qt5") + endif() + endif() + + # required by both versions + set(CMAKE_AUTOMOC TRUE) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + + # add examples + add_executable(qimage_blur qimage_blur.cpp) + target_link_libraries(qimage_blur ${OpenCL_LIBRARIES} ${Boost_LIBRARIES} ${QT_LIBRARIES}) + + set(QT_OPENGL_EXAMPLES + mandelbrot + nbody + resize_image + ) + foreach(EXAMPLE ${QT_OPENGL_EXAMPLES}) + add_executable(${EXAMPLE} ${EXAMPLE}.cpp) + target_link_libraries(${EXAMPLE} ${OpenCL_LIBRARIES} ${Boost_LIBRARIES} ${QT_LIBRARIES}) + if(APPLE) + target_link_libraries(${EXAMPLE} "-framework OpenGL") + elseif(UNIX) + target_link_libraries(${EXAMPLE} GL) + endif() + endforeach() +endif() diff --git a/src/boost/libs/compute/example/amd_cpp_kernel.cpp b/src/boost/libs/compute/example/amd_cpp_kernel.cpp new file mode 100644 index 00000000..0207312f --- /dev/null +++ b/src/boost/libs/compute/example/amd_cpp_kernel.cpp @@ -0,0 +1,116 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +// this example shows how to use the static c++ kernel language +// extension (currently only supported by AMD) to compile and +// execute a templated c++ kernel. +// Using platform vendor info to decide if this is AMD platform +int main() +{ + // get default device and setup context + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + // check the platform vendor string + if(device.platform().vendor() != "Advanced Micro Devices, Inc."){ + std::cerr << "error: static C++ kernel language is only " + << "supported on AMD devices." + << std::endl; + return 0; + } + + // create input int values and copy them to the device + int int_data[] = { 1, 2, 3, 4}; + compute::vector<int> int_vector(int_data, int_data + 4, queue); + + // create input float values and copy them to the device + float float_data[] = { 2.0f, 4.0f, 6.0f, 8.0f }; + compute::vector<float> float_vector(float_data, float_data + 4, queue); + + // create kernel source with a templated function and templated kernel + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + // define our templated function which returns the square of its input + template<typename T> + inline T square(const T x) + { + return x * x; + } + + // define our templated kernel which calls square on each value in data + template<typename T> + __kernel void square_kernel(__global T *data) + { + const uint i = get_global_id(0); + data[i] = square(data[i]); + } + + // explicitly instantiate the square kernel for int's. this allows + // for it to be called from the host with the given mangled name. + template __attribute__((mangled_name(square_kernel_int))) + __kernel void square_kernel(__global int *data); + + // also instantiate the square kernel for float's. + template __attribute__((mangled_name(square_kernel_float))) + __kernel void square_kernel(__global float *data); + ); + + // build the program. must enable the c++ static kernel language + // by passing the "-x clc++" compile option. + compute::program square_program = + compute::program::build_with_source(source, context, "-x clc++"); + + // create the square kernel for int's by using its mangled name declared + // in the explicit template instantiation. + compute::kernel square_int_kernel(square_program, "square_kernel_int"); + square_int_kernel.set_arg(0, int_vector); + + // execute the square int kernel + queue.enqueue_1d_range_kernel(square_int_kernel, 0, int_vector.size(), 4); + + // print out the squared int values + std::cout << "int's: "; + compute::copy( + int_vector.begin(), int_vector.end(), + std::ostream_iterator<int>(std::cout, " "), + queue + ); + std::cout << std::endl; + + // now create the square kernel for float's + compute::kernel square_float_kernel(square_program, "square_kernel_float"); + square_float_kernel.set_arg(0, float_vector); + + // execute the square int kernel + queue.enqueue_1d_range_kernel(square_float_kernel, 0, float_vector.size(), 4); + + // print out the squared float values + std::cout << "float's: "; + compute::copy( + float_vector.begin(), float_vector.end(), + std::ostream_iterator<float>(std::cout, " "), + queue + ); + std::cout << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/batched_determinant.cpp b/src/boost/libs/compute/example/batched_determinant.cpp new file mode 100644 index 00000000..0029151e --- /dev/null +++ b/src/boost/libs/compute/example/batched_determinant.cpp @@ -0,0 +1,96 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <Eigen/Core> +#include <Eigen/LU> + +#include <boost/compute/function.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace compute = boost::compute; + +// this example shows how to compute the determinant of many 4x4 matrices +// using a determinant function and the transform() algorithm. in OpenCL the +// float16 type can be used to store a 4x4 matrix and the components are laid +// out in the following order: +// +// M = [ s0 s4 s8 sc ] +// [ s1 s5 s9 sd ] +// [ s2 s6 sa se ] +// [ s3 s7 sb sf ] +// +// the input matrices are created using eigen's random matrix and then +// used again at the end to verify the results of the determinant function. +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + size_t n = 1000; + + // create random 4x4 matrices on the host + std::vector<Eigen::Matrix4f> matrices(n); + for(size_t i = 0; i < n; i++){ + matrices[i] = Eigen::Matrix4f::Random(); + } + + // copy matrices to the device + using compute::float16_; + compute::vector<float16_> input(n, context); + compute::copy( + matrices.begin(), matrices.end(), input.begin(), queue + ); + + // function returning the determinant of a 4x4 matrix. + BOOST_COMPUTE_FUNCTION(float, determinant4x4, (const float16_ m), + { + return m.s0*m.s5*m.sa*m.sf + m.s0*m.s6*m.sb*m.sd + m.s0*m.s7*m.s9*m.se + + m.s1*m.s4*m.sb*m.se + m.s1*m.s6*m.s8*m.sf + m.s1*m.s7*m.sa*m.sc + + m.s2*m.s4*m.s9*m.sf + m.s2*m.s5*m.sb*m.sc + m.s2*m.s7*m.s8*m.sd + + m.s3*m.s4*m.sa*m.sd + m.s3*m.s5*m.s8*m.se + m.s3*m.s6*m.s9*m.sc - + m.s0*m.s5*m.sb*m.se - m.s0*m.s6*m.s9*m.sf - m.s0*m.s7*m.sa*m.sd - + m.s1*m.s4*m.sa*m.sf - m.s1*m.s6*m.sb*m.sc - m.s1*m.s7*m.s8*m.se - + m.s2*m.s4*m.sb*m.sd - m.s2*m.s5*m.s8*m.sf - m.s2*m.s7*m.s9*m.sc - + m.s3*m.s4*m.s9*m.se - m.s3*m.s5*m.sa*m.sc - m.s3*m.s6*m.s8*m.sd; + }); + + // calculate determinants on the gpu + compute::vector<float> determinants(n, context); + compute::transform( + input.begin(), input.end(), determinants.begin(), determinant4x4, queue + ); + + // check determinants + std::vector<float> host_determinants(n); + compute::copy( + determinants.begin(), determinants.end(), host_determinants.begin(), queue + ); + + for(size_t i = 0; i < n; i++){ + float det = matrices[i].determinant(); + + if(std::abs(det - host_determinants[i]) > 1e-6){ + std::cerr << "error: wrong determinant at " << i << " (" + << host_determinants[i] << " != " << det << ")" + << std::endl; + return -1; + } + } + + return 0; +} diff --git a/src/boost/libs/compute/example/black_scholes.cpp b/src/boost/libs/compute/example/black_scholes.cpp new file mode 100644 index 00000000..13a0e097 --- /dev/null +++ b/src/boost/libs/compute/example/black_scholes.cpp @@ -0,0 +1,168 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <cstdlib> +#include <iostream> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +// return a random float between lo and hi +float rand_float(float lo, float hi) +{ + float x = (float) std::rand() / (float) RAND_MAX; + + return (1.0f - x) * lo + x * hi; +} + +// this example demostrates a black-scholes option pricing kernel. +int main() +{ + // number of options + const int N = 4000000; + + // black-scholes parameters + const float risk_free_rate = 0.02f; + const float volatility = 0.30f; + + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + // initialize option data on host + std::vector<float> stock_price_data(N); + std::vector<float> option_strike_data(N); + std::vector<float> option_years_data(N); + + std::srand(5347); + for(int i = 0; i < N; i++){ + stock_price_data[i] = rand_float(5.0f, 30.0f); + option_strike_data[i] = rand_float(1.0f, 100.0f); + option_years_data[i] = rand_float(0.25f, 10.0f); + } + + // create memory buffers on the device + compute::vector<float> call_result(N, context); + compute::vector<float> put_result(N, context); + compute::vector<float> stock_price(N, context); + compute::vector<float> option_strike(N, context); + compute::vector<float> option_years(N, context); + + // copy initial values to the device + compute::copy_n(stock_price_data.begin(), N, stock_price.begin(), queue); + compute::copy_n(option_strike_data.begin(), N, option_strike.begin(), queue); + compute::copy_n(option_years_data.begin(), N, option_years.begin(), queue); + + // source code for black-scholes program + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + // approximation of the cumulative normal distribution function + static float cnd(float d) + { + const float A1 = 0.319381530f; + const float A2 = -0.356563782f; + const float A3 = 1.781477937f; + const float A4 = -1.821255978f; + const float A5 = 1.330274429f; + const float RSQRT2PI = 0.39894228040143267793994605993438f; + + float K = 1.0f / (1.0f + 0.2316419f * fabs(d)); + float cnd = + RSQRT2PI * exp(-0.5f * d * d) * + (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))); + + if(d > 0){ + cnd = 1.0f - cnd; + } + + return cnd; + } + + // black-scholes option pricing kernel + __kernel void black_scholes(__global float *call_result, + __global float *put_result, + __global const float *stock_price, + __global const float *option_strike, + __global const float *option_years, + float risk_free_rate, + float volatility) + { + const uint opt = get_global_id(0); + + float S = stock_price[opt]; + float X = option_strike[opt]; + float T = option_years[opt]; + float R = risk_free_rate; + float V = volatility; + + float sqrtT = sqrt(T); + float d1 = (log(S / X) + (R + 0.5f * V * V) * T) / (V * sqrtT); + float d2 = d1 - V * sqrtT; + float CNDD1 = cnd(d1); + float CNDD2 = cnd(d2); + + float expRT = exp(-R * T); + call_result[opt] = S * CNDD1 - X * expRT * CNDD2; + put_result[opt] = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1); + } + ); + + // build black-scholes program + compute::program program = compute::program::create_with_source(source, context); + program.build(); + + // setup black-scholes kernel + compute::kernel kernel(program, "black_scholes"); + kernel.set_arg(0, call_result); + kernel.set_arg(1, put_result); + kernel.set_arg(2, stock_price); + kernel.set_arg(3, option_strike); + kernel.set_arg(4, option_years); + kernel.set_arg(5, risk_free_rate); + kernel.set_arg(6, volatility); + + // execute black-scholes kernel + queue.enqueue_1d_range_kernel(kernel, 0, N, 0); + + // print out the first option's put and call prices + float call0, put0; + compute::copy_n(put_result.begin(), 1, &put0, queue); + compute::copy_n(call_result.begin(), 1, &call0, queue); + + std::cout << "option 0 call price: " << call0 << std::endl; + std::cout << "option 0 put price: " << put0 << std::endl; + + // due to the differences in the random-number generators between Operating Systems + // and/or compilers, we will get different "expected" results for this example +#ifdef __APPLE__ + double expected_call0 = 0.000249461; + double expected_put0 = 26.2798; +#elif _MSC_VER + double expected_call0 = 8.21412; + double expected_put0 = 2.25904; +#else + double expected_call0 = 0.0999f; + double expected_put0 = 43.0524f; +#endif + + // check option prices + if(std::abs(call0 - expected_call0) > 1e-4 || std::abs(put0 - expected_put0) > 1e-4){ + std::cerr << "error: option prices are wrong" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/boost/libs/compute/example/copy_data.cpp b/src/boost/libs/compute/example/copy_data.cpp new file mode 100644 index 00000000..0550287f --- /dev/null +++ b/src/boost/libs/compute/example/copy_data.cpp @@ -0,0 +1,49 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//[copy_data_example + +#include <vector> + +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> + +namespace compute = boost::compute; + +int main() +{ + // get default device and setup context + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + // create data array on host + int host_data[] = { 1, 3, 5, 7, 9 }; + + // create vector on device + compute::vector<int> device_vector(5, context); + + // copy from host to device + compute::copy( + host_data, host_data + 5, device_vector.begin(), queue + ); + + // create vector on host + std::vector<int> host_vector(5); + + // copy data back to host + compute::copy( + device_vector.begin(), device_vector.end(), host_vector.begin(), queue + ); + + return 0; +} + +//] diff --git a/src/boost/libs/compute/example/fizz_buzz.cpp b/src/boost/libs/compute/example/fizz_buzz.cpp new file mode 100644 index 00000000..2c69995f --- /dev/null +++ b/src/boost/libs/compute/example/fizz_buzz.cpp @@ -0,0 +1,160 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/accumulate.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/utility/dim.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +const char fizz_buzz_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + // returns the length of the string for the number 'n'. This is used + // during the first pass when we calculate the amount of space needed + // for each string in the fizz-buzz sequence. + inline uint fizz_buzz_string_length(uint n) + { + if((n % 5 == 0) && (n % 3 == 0)){ + return sizeof("fizzbuzz"); + } + else if(n % 5 == 0){ + return sizeof("fizz"); + } + else if(n % 3 == 0){ + return sizeof("buzz"); + } + else { + uint digits = 0; + while(n){ + n /= 10; + digits++; + } + return digits + 1; + } + } + + // first-pass kernel which calculates the string length for each number + // and writes it to the string_lengths array. these will then be passed + // to exclusive_scan() to calculate the output offsets for each string. + __kernel void fizz_buzz_allocate_strings(__global uint *string_lengths) + { + const uint i = get_global_id(0); + const uint n = i + 1; + + string_lengths[i] = fizz_buzz_string_length(n); + } + + // copy the string 's' with length 'n' to 'result' (just like strncpy()) + inline void copy_string(__constant const char *s, uint n, __global char *result) + { + while(n--){ + result[n] = s[n]; + } + } + + // reverse the string [start, end). + inline void reverse_string(__global char *start, __global char *end) + { + while(start < end){ + char tmp = *end; + *end = *start; + *start = tmp; + start++; + end--; + } + } + + // second-pass kernel which copies the fizz-buzz string for each number to + // buffer using the previously calculated offsets. + __kernel void fizz_buzz_copy_strings(__global const uint *offsets, __global char *buffer) + { + const uint i = get_global_id(0); + const uint n = i + 1; + const uint offset = offsets[i]; + + if((n % 5 == 0) && (n % 3 == 0)){ + copy_string("fizzbuzz\n", 9, buffer + offset); + } + else if(n % 5 == 0){ + copy_string("fizz\n", 5, buffer + offset); + } + else if(n % 3 == 0){ + copy_string("buzz\n", 5, buffer + offset); + } + else { + // convert number to string and write it to the output + __global char *number = buffer + offset; + uint n_ = n; + while(n_){ + *number++ = (n_%10) + '0'; + n_ /= 10; + } + reverse_string(buffer + offset, number - 1); + *number = '\n'; + } + } +); + +int main() +{ + using compute::dim; + using compute::uint_; + + // fizz-buzz up to 100 + size_t n = 100; + + // get the default device + compute::device device = compute::system::default_device(); + compute::context ctx(device); + compute::command_queue queue(ctx, device); + + // compile the fizz-buzz program + compute::program fizz_buzz_program = + compute::program::create_with_source(fizz_buzz_source, ctx); + fizz_buzz_program.build(); + + // create a vector for the output string and computing offsets + compute::vector<char> output(ctx); + compute::vector<uint_> offsets(n, ctx); + + // run the allocate kernel to calculate string lengths + compute::kernel allocate_kernel(fizz_buzz_program, "fizz_buzz_allocate_strings"); + allocate_kernel.set_arg(0, offsets); + queue.enqueue_nd_range_kernel(allocate_kernel, dim(0), dim(n), dim(1)); + + // allocate space for the output string + output.resize( + compute::accumulate(offsets.begin(), offsets.end(), 0, queue) + ); + + // scan string lengths for each number to calculate the output offsets + compute::exclusive_scan( + offsets.begin(), offsets.end(), offsets.begin(), queue + ); + + // run the copy kernel to fill the output buffer + compute::kernel copy_kernel(fizz_buzz_program, "fizz_buzz_copy_strings"); + copy_kernel.set_arg(0, offsets); + copy_kernel.set_arg(1, output); + queue.enqueue_nd_range_kernel(copy_kernel, dim(0), dim(n), dim(1)); + + // copy the string to the host and print it to stdout + std::string str; + str.resize(output.size()); + compute::copy(output.begin(), output.end(), str.begin(), queue); + std::cout << str; + + return 0; +} diff --git a/src/boost/libs/compute/example/hello_world.cpp b/src/boost/libs/compute/example/hello_world.cpp new file mode 100644 index 00000000..ef45fdd6 --- /dev/null +++ b/src/boost/libs/compute/example/hello_world.cpp @@ -0,0 +1,30 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//[hello_world_example + +#include <iostream> + +#include <boost/compute/core.hpp> + +namespace compute = boost::compute; + +int main() +{ + // get the default device + compute::device device = compute::system::default_device(); + + // print the device's name and platform + std::cout << "hello from " << device.name(); + std::cout << " (platform: " << device.platform().name() << ")" << std::endl; + + return 0; +} +//] diff --git a/src/boost/libs/compute/example/host_sort.cpp b/src/boost/libs/compute/example/host_sort.cpp new file mode 100644 index 00000000..b5ff52cf --- /dev/null +++ b/src/boost/libs/compute/example/host_sort.cpp @@ -0,0 +1,56 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <vector> + +#include <boost/spirit/include/karma.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/sort.hpp> + +namespace compute = boost::compute; +namespace karma = boost::spirit::karma; + +int rand_int() +{ + return rand() % 100; +} + +// this example demonstrates how to sort a std::vector of ints on the GPU +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + // create vector of random values on the host + std::vector<int> vector(8); + std::generate(vector.begin(), vector.end(), rand_int); + + // print input vector + std::cout << "input: [ " + << karma::format(karma::int_ % ", ", vector) + << " ]" + << std::endl; + + // sort vector + compute::sort(vector.begin(), vector.end(), queue); + + // print sorted vector + std::cout << "output: [ " + << karma::format(karma::int_ % ", ", vector) + << " ]" + << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/inline_ptx.cpp b/src/boost/libs/compute/example/inline_ptx.cpp new file mode 100644 index 00000000..fe4e32f0 --- /dev/null +++ b/src/boost/libs/compute/example/inline_ptx.cpp @@ -0,0 +1,72 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <algorithm> +#include <iostream> +#include <vector> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> + +namespace compute = boost::compute; + +// this example shows how to embed PTX assembly instructions +// directly into boost.compute functions and use them with the +// transform() algorithm. +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + // check to ensure we have an nvidia device + if(gpu.vendor() != "NVIDIA Corporation"){ + std::cerr << "error: inline PTX assembly is only supported " + << "on NVIDIA devices." + << std::endl; + return 0; + } + + // create input values and copy them to the device + using compute::uint_; + uint_ data[] = { 0x00, 0x01, 0x11, 0xFF }; + compute::vector<uint_> input(data, data + 4, queue); + + // function returning the number of bits set (aka population count or + // popcount) using the "popc" inline ptx assembly instruction. + BOOST_COMPUTE_FUNCTION(uint_, nvidia_popc, (uint_ x), + { + uint count; + asm("popc.b32 %0, %1;" : "=r"(count) : "r"(x)); + return count; + }); + + // calculate the popcount for each input value + compute::vector<uint_> output(input.size(), context); + compute::transform( + input.begin(), input.end(), output.begin(), nvidia_popc, queue + ); + + // copy results back to the host and print them out + std::vector<uint_> counts(output.size()); + compute::copy(output.begin(), output.end(), counts.begin(), queue); + + for(size_t i = 0; i < counts.size(); i++){ + std::cout << "0x" << std::hex << data[i] + << " has " << counts[i] + << " bits set" << std::endl; + } + + return 0; +} diff --git a/src/boost/libs/compute/example/k_means.cpp b/src/boost/libs/compute/example/k_means.cpp new file mode 100644 index 00000000..cd291a9b --- /dev/null +++ b/src/boost/libs/compute/example/k_means.cpp @@ -0,0 +1,229 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/compute/random/default_random_engine.hpp> +#include <boost/compute/random/uniform_real_distribution.hpp> +#include <boost/compute/utility/dim.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +using compute::dim; +using compute::int_; +using compute::float_; +using compute::float2_; + +// the k-means example implements the k-means clustering algorithm +int main() +{ + // number of clusters + size_t k = 6; + + // number of points + size_t n_points = 4500; + + // height and width of image + size_t height = 800; + size_t width = 800; + + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + // generate random, uniformily-distributed points + compute::default_random_engine random_engine(queue); + compute::uniform_real_distribution<float_> uniform_distribution(0, 800); + + compute::vector<float2_> points(n_points, context); + uniform_distribution.generate( + compute::make_buffer_iterator<float_>(points.get_buffer(), 0), + compute::make_buffer_iterator<float_>(points.get_buffer(), n_points * 2), + random_engine, + queue + ); + + // initialize all points to cluster 0 + compute::vector<int_> clusters(n_points, context); + compute::fill(clusters.begin(), clusters.end(), 0, queue); + + // create initial means with the first k points + compute::vector<float2_> means(k, context); + compute::copy_n(points.begin(), k, means.begin(), queue); + + // k-means clustering program source + const char k_means_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void assign_clusters(__global const float2 *points, + __global const float2 *means, + const int k, + __global int *clusters) + { + const uint gid = get_global_id(0); + + const float2 point = points[gid]; + + // find the closest cluster + float current_distance = 0; + int closest_cluster = -1; + + // find closest cluster mean to the point + for(int i = 0; i < k; i++){ + const float2 mean = means[i]; + + int distance_to_mean = distance(point, mean); + if(closest_cluster == -1 || distance_to_mean < current_distance){ + current_distance = distance_to_mean; + closest_cluster = i; + } + } + + // write new cluster + clusters[gid] = closest_cluster; + } + + __kernel void update_means(__global const float2 *points, + const uint n_points, + __global float2 *means, + __global const int *clusters) + { + const uint k = get_global_id(0); + + float2 sum = { 0, 0 }; + float count = 0; + for(uint i = 0; i < n_points; i++){ + if(clusters[i] == k){ + sum += points[i]; + count += 1; + } + } + + means[k] = sum / count; + } + ); + + // build the k-means program + compute::program k_means_program = + compute::program::build_with_source(k_means_source, context); + + // setup the k-means kernels + compute::kernel assign_clusters_kernel(k_means_program, "assign_clusters"); + assign_clusters_kernel.set_arg(0, points); + assign_clusters_kernel.set_arg(1, means); + assign_clusters_kernel.set_arg(2, int_(k)); + assign_clusters_kernel.set_arg(3, clusters); + + compute::kernel update_means_kernel(k_means_program, "update_means"); + update_means_kernel.set_arg(0, points); + update_means_kernel.set_arg(1, int_(n_points)); + update_means_kernel.set_arg(2, means); + update_means_kernel.set_arg(3, clusters); + + // run the k-means algorithm + for(int iteration = 0; iteration < 25; iteration++){ + queue.enqueue_1d_range_kernel(assign_clusters_kernel, 0, n_points, 0); + queue.enqueue_1d_range_kernel(update_means_kernel, 0, k, 0); + } + + // create output image + compute::image2d image( + context, width, height, compute::image_format(CL_RGBA, CL_UNSIGNED_INT8) + ); + + // program with two kernels, one to fill the image with white, and then + // one the draw to points calculated in coordinates on the image + const char draw_walk_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void draw_points(__global const float2 *points, + __global const int *clusters, + __write_only image2d_t image) + { + const uint i = get_global_id(0); + const float2 coord = points[i]; + + // map cluster number to color + uint4 color = { 0, 0, 0, 0 }; + switch(clusters[i]){ + case 0: + color = (uint4)(255, 0, 0, 255); + break; + case 1: + color = (uint4)(0, 255, 0, 255); + break; + case 2: + color = (uint4)(0, 0, 255, 255); + break; + case 3: + color = (uint4)(255, 255, 0, 255); + break; + case 4: + color = (uint4)(255, 0, 255, 255); + break; + case 5: + color = (uint4)(0, 255, 255, 255); + break; + } + + // draw a 3x3 pixel point + for(int x = -1; x <= 1; x++){ + for(int y = -1; y <= 1; y++){ + if(coord.x + x > 0 && coord.x + x < get_image_width(image) && + coord.y + y > 0 && coord.y + y < get_image_height(image)){ + write_imageui(image, (int2)(coord.x, coord.y) + (int2)(x, y), color); + } + } + } + } + + __kernel void fill_gray(__write_only image2d_t image) + { + const int2 coord = { get_global_id(0), get_global_id(1) }; + + if(coord.x < get_image_width(image) && coord.y < get_image_height(image)){ + uint4 gray = { 15, 15, 15, 15 }; + write_imageui(image, coord, gray); + } + } + ); + + // build the program + compute::program draw_program = + compute::program::build_with_source(draw_walk_source, context); + + // fill image with dark gray + compute::kernel fill_kernel(draw_program, "fill_gray"); + fill_kernel.set_arg(0, image); + + queue.enqueue_nd_range_kernel( + fill_kernel, dim(0, 0), dim(width, height), dim(1, 1) + ); + + // draw points colored according to cluster + compute::kernel draw_kernel(draw_program, "draw_points"); + draw_kernel.set_arg(0, points); + draw_kernel.set_arg(1, clusters); + draw_kernel.set_arg(2, image); + queue.enqueue_1d_range_kernel(draw_kernel, 0, n_points, 0); + + // show image + compute::opencv_imshow("k-means", image, queue); + + // wait and return + cv::waitKey(0); + + return 0; +} diff --git a/src/boost/libs/compute/example/list_devices.cpp b/src/boost/libs/compute/example/list_devices.cpp new file mode 100644 index 00000000..b7bdbc94 --- /dev/null +++ b/src/boost/libs/compute/example/list_devices.cpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/core.hpp> + +namespace compute = boost::compute; + +int main() +{ + std::vector<compute::platform> platforms = compute::system::platforms(); + + for(size_t i = 0; i < platforms.size(); i++){ + const compute::platform &platform = platforms[i]; + + std::cout << "Platform '" << platform.name() << "'" << std::endl; + + std::vector<compute::device> devices = platform.devices(); + for(size_t j = 0; j < devices.size(); j++){ + const compute::device &device = devices[j]; + + std::string type; + if(device.type() & compute::device::gpu) + type = "GPU Device"; + else if(device.type() & compute::device::cpu) + type = "CPU Device"; + else if(device.type() & compute::device::accelerator) + type = "Accelerator Device"; + else + type = "Unknown Device"; + + std::cout << " " << type << ": " << device.name() << std::endl; + } + } + + return 0; +} diff --git a/src/boost/libs/compute/example/longest_vector.cpp b/src/boost/libs/compute/example/longest_vector.cpp new file mode 100644 index 00000000..faada332 --- /dev/null +++ b/src/boost/libs/compute/example/longest_vector.cpp @@ -0,0 +1,58 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <iterator> + +#include <boost/compute/algorithm/max_element.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/geometry.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace compute = boost::compute; + +// this example shows how to use the max_element() algorithm along with +// a transform_iterator and the length() function to find the longest +// 4-component vector in an array of vectors +int main() +{ + using compute::float4_; + + // vectors data + float data[] = { 1.0f, 2.0f, 3.0f, 0.0f, + 4.0f, 5.0f, 6.0f, 0.0f, + 7.0f, 8.0f, 9.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f }; + + // create device vector with the vector data + compute::vector<float4_> vector( + reinterpret_cast<float4_ *>(data), + reinterpret_cast<float4_ *>(data) + 4 + ); + + // find the longest vector + compute::vector<float4_>::const_iterator iter = + compute::max_element( + compute::make_transform_iterator( + vector.begin(), compute::length<float4_>() + ), + compute::make_transform_iterator( + vector.end(), compute::length<float4_>() + ) + ).base(); + + // print the index of the longest vector + std::cout << "longest vector index: " + << std::distance(vector.begin(), iter) + << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/mandelbrot.cpp b/src/boost/libs/compute/example/mandelbrot.cpp new file mode 100644 index 00000000..5ddb3687 --- /dev/null +++ b/src/boost/libs/compute/example/mandelbrot.cpp @@ -0,0 +1,224 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <algorithm> + +#include <QtGlobal> +#if QT_VERSION >= 0x050000 +#include <QtWidgets> +#else +#include <QtGui> +#endif +#include <QtOpenGL> + +#ifndef Q_MOC_RUN +#include <boost/compute/command_queue.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/interop/opengl.hpp> +#include <boost/compute/utility/dim.hpp> +#include <boost/compute/utility/source.hpp> +#endif // Q_MOC_RUN + +namespace compute = boost::compute; + +// opencl source code +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + // map value to color + float4 color(uint i) + { + uchar c = i; + uchar x = 35; + uchar y = 25; + uchar z = 15; + uchar max = 255; + + if(i == 256) + return (float4)(0, 0, 0, 255); + else + return (float4)(max-x*i, max-y*i, max-z*i, max) / 255.f; + } + + __kernel void mandelbrot(__write_only image2d_t image) + { + const uint x_coord = get_global_id(0); + const uint y_coord = get_global_id(1); + const uint width = get_global_size(0); + const uint height = get_global_size(1); + + float x_origin = ((float) x_coord / width) * 3.25f - 2.0f; + float y_origin = ((float) y_coord / height) * 2.5f - 1.25f; + + float x = 0.0f; + float y = 0.0f; + + uint i = 0; + while(x*x + y*y <= 4.f && i < 256){ + float tmp = x*x - y*y + x_origin; + y = 2*x*y + y_origin; + x = tmp; + i++; + } + + int2 coord = { x_coord, y_coord }; + write_imagef(image, coord, color(i)); + }; +); + +class MandelbrotWidget : public QGLWidget +{ + Q_OBJECT + +public: + MandelbrotWidget(QWidget *parent = 0); + ~MandelbrotWidget(); + + void initializeGL(); + void resizeGL(int width, int height); + void paintGL(); + void keyPressEvent(QKeyEvent* event); + +private: + compute::context context_; + compute::command_queue queue_; + compute::program program_; + GLuint gl_texture_; + compute::opengl_texture cl_texture_; +}; + +MandelbrotWidget::MandelbrotWidget(QWidget *parent) + : QGLWidget(parent) +{ + gl_texture_ = 0; +} + +MandelbrotWidget::~MandelbrotWidget() +{ +} + +void MandelbrotWidget::initializeGL() +{ + // setup opengl + glDisable(GL_LIGHTING); + + // create the OpenGL/OpenCL shared context + context_ = compute::opengl_create_shared_context(); + + // get gpu device + compute::device gpu = context_.get_device(); + std::cout << "device: " << gpu.name() << std::endl; + + // setup command queue + queue_ = compute::command_queue(context_, gpu); + + // build mandelbrot program + program_ = compute::program::create_with_source(source, context_); + program_.build(); +} + +void MandelbrotWidget::resizeGL(int width, int height) +{ +#if QT_VERSION >= 0x050000 + // scale height/width based on device pixel ratio + width /= windowHandle()->devicePixelRatio(); + height /= windowHandle()->devicePixelRatio(); +#endif + + // resize viewport + glViewport(0, 0, width, height); + + // delete old texture + if(gl_texture_){ + glDeleteTextures(1, &gl_texture_); + gl_texture_ = 0; + } + + // generate new texture + glGenTextures(1, &gl_texture_); + glBindTexture(GL_TEXTURE_2D, gl_texture_); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexImage2D( + GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0 + ); + + // create opencl object for the texture + cl_texture_ = compute::opengl_texture( + context_, GL_TEXTURE_2D, 0, gl_texture_, CL_MEM_WRITE_ONLY + ); +} + +void MandelbrotWidget::paintGL() +{ + using compute::dim; + + float w = width(); + float h = height(); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0.0, w, 0.0, h, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + // setup the mandelbrot kernel + compute::kernel kernel(program_, "mandelbrot"); + kernel.set_arg(0, cl_texture_); + + // acquire the opengl texture so it can be used in opencl + compute::opengl_enqueue_acquire_gl_objects(1, &cl_texture_.get(), queue_); + + // execute the mandelbrot kernel + queue_.enqueue_nd_range_kernel( + kernel, dim(0, 0), dim(width(), height()), dim(1, 1) + ); + + // release the opengl texture so it can be used by opengl + compute::opengl_enqueue_release_gl_objects(1, &cl_texture_.get(), queue_); + + // ensure opencl is finished before rendering in opengl + queue_.finish(); + + // draw a single quad with the mandelbrot image texture + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, gl_texture_); + + glBegin(GL_QUADS); + glTexCoord2f(0, 0); glVertex2f(0, 0); + glTexCoord2f(0, 1); glVertex2f(0, h); + glTexCoord2f(1, 1); glVertex2f(w, h); + glTexCoord2f(1, 0); glVertex2f(w, 0); + glEnd(); +} + +void MandelbrotWidget::keyPressEvent(QKeyEvent* event) +{ + if(event->key() == Qt::Key_Escape) { + this->close(); + } +} + +// the mandelbrot example shows how to create a mandelbrot image in +// OpenCL and render the image as a texture in OpenGL +int main(int argc, char *argv[]) +{ + QApplication app(argc, argv); + + MandelbrotWidget widget; + widget.show(); + + return app.exec(); +} + +#include "mandelbrot.moc" diff --git a/src/boost/libs/compute/example/mapped_view.cpp b/src/boost/libs/compute/example/mapped_view.cpp new file mode 100644 index 00000000..1e05107a --- /dev/null +++ b/src/boost/libs/compute/example/mapped_view.cpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <vector> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/container/mapped_view.hpp> + +namespace compute = boost::compute; + +// this example demonstrates how to use the mapped_view class to map +// an array of numbers to device memory and use the reduce() algorithm +// to calculate the sum. +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + // create data on host + int data[] = { 4, 2, 3, 7, 8, 9, 1, 6 }; + + // create mapped view on device + compute::mapped_view<int> view(data, 8, context); + + // use reduce() to calculate sum on the device + int sum = 0; + compute::reduce(view.begin(), view.end(), &sum, queue); + + // print the sum on the host + std::cout << "sum: " << sum << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/matrix_transpose.cpp b/src/boost/libs/compute/example/matrix_transpose.cpp new file mode 100644 index 00000000..ee9b1e9d --- /dev/null +++ b/src/boost/libs/compute/example/matrix_transpose.cpp @@ -0,0 +1,355 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <cstdlib> + +#include <boost/program_options.hpp> + +#include <boost/compute/core.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; +namespace po = boost::program_options; + +using compute::uint_; + +const uint_ TILE_DIM = 32; +const uint_ BLOCK_ROWS = 8; + +// generate a copy kernel program +compute::kernel make_copy_kernel(const compute::context& context) +{ + // source for the copy_kernel program + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void copy_kernel(__global const float *src, __global float *dst) + { + uint x = get_group_id(0) * TILE_DIM + get_local_id(0); + uint y = get_group_id(1) * TILE_DIM + get_local_id(1); + + uint width = get_num_groups(0) * TILE_DIM; + + for(uint i = 0 ; i < TILE_DIM ; i+= BLOCK_ROWS){ + dst[(y+i)*width +x] = src[(y+i)*width + x]; + } + } + ); + + // setup compilation flags for the copy program + std::stringstream options; + options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; + + // create and build the copy program + compute::program program = + compute::program::build_with_source(source, context, options.str()); + + // create and return the copy kernel + return program.create_kernel("copy_kernel"); +} + +// generate a naive transpose kernel +compute::kernel make_naive_transpose_kernel(const compute::context& context) +{ + // source for the naive_transpose kernel + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void naive_transpose(__global const float *src, __global float *dst) + { + uint x = get_group_id(0) * TILE_DIM + get_local_id(0); + uint y = get_group_id(1) * TILE_DIM + get_local_id(1); + + uint width = get_num_groups(0) * TILE_DIM; + + for(uint i = 0 ; i < TILE_DIM; i+= BLOCK_ROWS){ + dst[x*width + y+i] = src[(y+i)*width + x]; + } + } + ); + + // setup compilation flags for the naive_transpose program + std::stringstream options; + options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; + + // create and build the naive_transpose program + compute::program program = + compute::program::build_with_source(source, context, options.str()); + + // create and return the naive_transpose kernel + return program.create_kernel("naive_transpose"); +} + +// generates a coalesced transpose kernel +compute::kernel make_coalesced_transpose_kernel(const compute::context& context) +{ + // source for the coalesced_transpose kernel + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void coalesced_transpose(__global const float *src, __global float *dst) + { + __local float tile[TILE_DIM][TILE_DIM]; + + // compute indexes + uint x = get_group_id(0) * TILE_DIM + get_local_id(0); + uint y = get_group_id(1) * TILE_DIM + get_local_id(1); + + uint width = get_num_groups(0) * TILE_DIM; + + // load inside local memory + for(uint i = 0 ; i < TILE_DIM; i+= BLOCK_ROWS){ + tile[get_local_id(1)+i][get_local_id(0)] = src[(y+i)*width + x]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // transpose indexes + x = get_group_id(1) * TILE_DIM + get_local_id(0); + y = get_group_id(0) * TILE_DIM + get_local_id(1); + + // write output from local memory + for(uint i = 0 ; i < TILE_DIM ; i+=BLOCK_ROWS){ + dst[(y+i)*width + x] = tile[get_local_id(0)][get_local_id(1)+i]; + } + } + ); + + // setup compilation flags for the coalesced_transpose program + std::stringstream options; + options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; + + // create and build the coalesced_transpose program + compute::program program = + compute::program::build_with_source(source, context, options.str()); + + // create and return coalesced_transpose kernel + return program.create_kernel("coalesced_transpose"); +} + +// generate a coalesced withtout bank conflicts kernel +compute::kernel make_coalesced_no_bank_conflicts_kernel(const compute::context& context) +{ + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void coalesced_no_bank_conflicts(__global const float *src, __global float *dst) + { + // TILE_DIM+1 is here to avoid bank conflicts in local memory + __local float tile[TILE_DIM][TILE_DIM+1]; + + // compute indexes + uint x = get_group_id(0) * TILE_DIM + get_local_id(0); + uint y = get_group_id(1) * TILE_DIM + get_local_id(1); + + uint width = get_num_groups(0) * TILE_DIM; + + // load inside local memory + for(uint i = 0 ; i < TILE_DIM; i+= BLOCK_ROWS){ + tile[get_local_id(1)+i][get_local_id(0)] = src[(y+i)*width + x]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // transpose indexes + x = get_group_id(1) * TILE_DIM + get_local_id(0); + y = get_group_id(0) * TILE_DIM + get_local_id(1); + + // write output from local memory + for(uint i = 0 ; i < TILE_DIM ; i+=BLOCK_ROWS){ + dst[(y+i)*width + x] = tile[get_local_id(0)][get_local_id(1)+i]; + } + } + ); + + // setup compilation flags for the coalesced_no_bank_conflicts program + std::stringstream options; + options << "-DTILE_DIM=" << TILE_DIM << " -DBLOCK_ROWS=" << BLOCK_ROWS; + + // create and build the coalesced_no_bank_conflicts program + compute::program program = + compute::program::build_with_source(source, context, options.str()); + + // create and return the coalesced_no_bank_conflicts kernel + return program.create_kernel("coalesced_no_bank_conflicts"); +} + +// compare 'expectedResult' to 'transposedMatrix'. prints an error message if not equal. +bool check_transposition(const std::vector<float>& expectedResult, + uint_ size, + const std::vector<float>& transposedMatrix) +{ + for(uint_ i = 0 ; i < size ; ++i){ + if(expectedResult[i] != transposedMatrix[i]){ + std::cout << "idx = " << i << " , expected " << expectedResult[i] + << " , got " << transposedMatrix[i] << std::endl; + std::cout << "FAILED" << std::endl; + return false; + } + } + return true; +} + +// generate a matrix inside 'in' and do the tranposition inside 'out' +void generate_matrix(std::vector<float>& in, std::vector<float>& out, uint_ rows, uint_ cols) +{ + // generate a matrix + for(uint_ i = 0 ; i < rows ; ++i){ + for(uint_ j = 0 ; j < cols ; ++j){ + in[i*cols + j] = i*cols + j; + } + } + + // store transposed result + for(uint_ j = 0; j < cols ; ++j){ + for(uint_ i = 0 ; i < rows ; ++i){ + out[j*rows + i] = in[i*cols + j]; + } + } +} + +// neccessary for 64-bit integer on win32 +#ifdef _WIN32 +#define uint64_t unsigned __int64 +#endif + +int main(int argc, char *argv[]) +{ + // setup command line arguments + po::options_description options("options"); + options.add_options() + ("help", "show usage instructions") + ("rows", po::value<uint_>()->default_value(4096), "number of matrix rows") + ("cols", po::value<uint_>()->default_value(4096), "number of matrix columns") + ; + + // parse command line + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, options), vm); + po::notify(vm); + + // check command line arguments + if(vm.count("help")){ + std::cout << options << std::endl; + return 0; + } + + // get number rows and columns for the matrix + const uint_ rows = vm["rows"].as<uint_>(); + const uint_ cols = vm["cols"].as<uint_>(); + + // get the default device + compute::device device = compute::system::default_device(); + + // print out device name and matrix information + std::cout << "Device: " << device.name() << std::endl; + std::cout << "Matrix Size: " << rows << "x" << cols << std::endl; + std::cout << "Grid Size: " << rows/TILE_DIM << "x" << cols/TILE_DIM << " blocks" << std::endl; + std::cout << "Local Size: " << TILE_DIM << "x" << BLOCK_ROWS << " threads" << std::endl; + std::cout << std::endl; + + // On OSX this example does not work on CPU devices + #if defined(__APPLE__) + if(device.type() & compute::device::cpu) { + std::cout << "On OSX this example does not work on CPU devices" << std::endl; + return 0; + } + #endif + + const size_t global_work_size[2] = {rows, cols*BLOCK_ROWS/TILE_DIM}; + const size_t local_work_size[2] = {TILE_DIM, BLOCK_ROWS}; + + // setup input data on the host + const uint_ size = rows * cols; + std::vector<float> h_input(size); + std::vector<float> h_output(size); + std::vector<float> expectedResult(size); + generate_matrix(h_input, expectedResult, rows, cols); + + // create a context for the device + compute::context context(device); + + // device vectors + compute::vector<float> d_input(size, context); + compute::vector<float> d_output(size, context); + + // command_queue with profiling + compute::command_queue queue(context, device, compute::command_queue::enable_profiling); + + // copy input data + compute::copy(h_input.begin(), h_input.end(), d_input.begin(), queue); + + // simple copy kernel + std::cout << "Testing copy_kernel:" << std::endl; + compute::kernel kernel = make_copy_kernel(context); + kernel.set_arg(0, d_input); + kernel.set_arg(1, d_output); + + compute::event start; + start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + queue.finish(); + uint64_t elapsed = start.duration<boost::chrono::nanoseconds>().count(); + + std::cout << " Elapsed: " << elapsed << " ns" << std::endl; + std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; + compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); + + check_transposition(h_input, rows*cols, h_output); + std::cout << std::endl; + + // naive_transpose kernel + std::cout << "Testing naive_transpose:" << std::endl; + kernel = make_naive_transpose_kernel(context); + kernel.set_arg(0, d_input); + kernel.set_arg(1, d_output); + + start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + queue.finish(); + elapsed = start.duration<boost::chrono::nanoseconds>().count(); + std::cout << " Elapsed: " << elapsed << " ns" << std::endl; + std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; + compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); + + check_transposition(expectedResult, rows*cols, h_output); + std::cout << std::endl; + + // coalesced_transpose kernel + std::cout << "Testing coalesced_transpose:" << std::endl; + kernel = make_coalesced_transpose_kernel(context); + kernel.set_arg(0, d_input); + kernel.set_arg(1, d_output); + + start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + queue.finish(); + elapsed = start.duration<boost::chrono::nanoseconds>().count(); + std::cout << " Elapsed: " << elapsed << " ns" << std::endl; + std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; + + compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); + + check_transposition(expectedResult, rows*cols, h_output); + std::cout << std::endl; + + // coalesced_no_bank_conflicts kernel + std::cout << "Testing coalesced_no_bank_conflicts:" << std::endl; + + kernel = make_coalesced_no_bank_conflicts_kernel(context); + kernel.set_arg(0, d_input); + kernel.set_arg(1, d_output); + + start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + queue.finish(); + elapsed = start.duration<boost::chrono::nanoseconds>().count(); + std::cout << " Elapsed: " << elapsed << " ns" << std::endl; + std::cout << " BandWidth: " << 2*rows*cols*sizeof(float) / elapsed << " GB/s" << std::endl; + + compute::copy(d_output.begin(), d_output.end(), h_output.begin(), queue); + + check_transposition(expectedResult, rows*cols, h_output); + std::cout << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/memory_limits.cpp b/src/boost/libs/compute/example/memory_limits.cpp new file mode 100644 index 00000000..8d0e4a7a --- /dev/null +++ b/src/boost/libs/compute/example/memory_limits.cpp @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/core.hpp> + +namespace compute = boost::compute; + +int main() +{ + // get the default device + compute::device device = compute::system::default_device(); + + std::cout << "device: " << device.name() << std::endl; + std::cout << " global memory size: " + << device.get_info<cl_ulong>(CL_DEVICE_GLOBAL_MEM_SIZE) / 1024 / 1024 + << " MB" + << std::endl; + std::cout << " local memory size: " + << device.get_info<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE) / 1024 + << " KB" + << std::endl; + std::cout << " constant memory size: " + << device.get_info<cl_ulong>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) / 1024 + << " KB" + << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/monte_carlo.cpp b/src/boost/libs/compute/example/monte_carlo.cpp new file mode 100644 index 00000000..8ae26209 --- /dev/null +++ b/src/boost/libs/compute/example/monte_carlo.cpp @@ -0,0 +1,73 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/function.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/count_if.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/random/default_random_engine.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace compute = boost::compute; + +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + std::cout << "device: " << gpu.name() << std::endl; + + using compute::uint_; + using compute::uint2_; + + +#ifdef CI_BUILD // lower number of points for CI builds + size_t n = 2000; +#else + // ten million random points + size_t n = 10000000; +#endif + + // generate random numbers + compute::default_random_engine rng(queue); + compute::vector<uint_> vector(n * 2, context); + rng.generate(vector.begin(), vector.end(), queue); + + // function returing true if the point is within the unit circle + BOOST_COMPUTE_FUNCTION(bool, is_in_unit_circle, (const uint2_ point), + { + const float x = point.x / (float) UINT_MAX - 1; + const float y = point.y / (float) UINT_MAX - 1; + + return (x*x + y*y) < 1.0f; + }); + + // iterate over vector<uint> as vector<uint2> + compute::buffer_iterator<uint2_> start = + compute::make_buffer_iterator<uint2_>(vector.get_buffer(), 0); + compute::buffer_iterator<uint2_> end = + compute::make_buffer_iterator<uint2_>(vector.get_buffer(), vector.size() / 2); + + // count number of random points within the unit circle + size_t count = compute::count_if(start, end, is_in_unit_circle, queue); + + // print out values + float count_f = static_cast<float>(count); + std::cout << "count: " << count << " / " << n << std::endl; + std::cout << "ratio: " << count_f / float(n) << std::endl; + std::cout << "pi = " << (count_f / float(n)) * 4.0f << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/nbody.cpp b/src/boost/libs/compute/example/nbody.cpp new file mode 100644 index 00000000..9379c63b --- /dev/null +++ b/src/boost/libs/compute/example/nbody.cpp @@ -0,0 +1,236 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Fabian Köhler <fabian2804@googlemail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#define GL_GLEXT_PROTOTYPES +#ifdef __APPLE__ +#include <OpenGL/gl.h> +#include <OpenGL/glext.h> +#else +#include <GL/gl.h> +#include <GL/glext.h> +#endif + +#include <QtGlobal> +#if QT_VERSION >= 0x050000 +#include <QtWidgets> +#else +#include <QtGui> +#endif +#include <QtOpenGL> +#include <QTimer> + +#include <boost/program_options.hpp> +#include <boost/random/uniform_real_distribution.hpp> +#include <boost/random/mersenne_twister.hpp> + +#ifndef Q_MOC_RUN +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/interop/opengl.hpp> +#include <boost/compute/utility/source.hpp> +#endif // Q_MOC_RUN + +namespace compute = boost::compute; +namespace po = boost::program_options; + +using compute::uint_; +using compute::float4_; + +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void updateVelocity(__global const float4* position, __global float4* velocity, float dt, uint N) + { + uint gid = get_global_id(0); + + float4 r = { 0.0f, 0.0f, 0.0f, 0.0f }; + float f = 0.0f; + for(uint i = 0; i != gid; i++) { + if(i != gid) { + r = position[i]-position[gid]; + f = length(r)+0.001f; + f *= f*f; + f = dt/f; + velocity[gid] += f*r; + } + } + } + __kernel void updatePosition(__global float4* position, __global const float4* velocity, float dt) + { + uint gid = get_global_id(0); + + position[gid].xyz += dt*velocity[gid].xyz; + } +); + +class NBodyWidget : public QGLWidget +{ + Q_OBJECT + +public: + NBodyWidget(std::size_t particles, float dt, QWidget* parent = 0); + ~NBodyWidget(); + + void initializeGL(); + void resizeGL(int width, int height); + void paintGL(); + void updateParticles(); + void keyPressEvent(QKeyEvent* event); + +private: + QTimer* timer; + + compute::context m_context; + compute::command_queue m_queue; + compute::program m_program; + compute::opengl_buffer m_position; + compute::vector<float4_>* m_velocity; + compute::kernel m_velocity_kernel; + compute::kernel m_position_kernel; + + bool m_initial_draw; + + const uint_ m_particles; + const float m_dt; +}; + +NBodyWidget::NBodyWidget(std::size_t particles, float dt, QWidget* parent) + : QGLWidget(parent), m_initial_draw(true), m_particles(particles), m_dt(dt) +{ + // create a timer to redraw as fast as possible + timer = new QTimer(this); + connect(timer, SIGNAL(timeout()), this, SLOT(updateGL())); + timer->start(1); +} + +NBodyWidget::~NBodyWidget() +{ + delete m_velocity; + + // delete the opengl buffer + GLuint vbo = m_position.get_opengl_object(); + glDeleteBuffers(1, &vbo); +} + +void NBodyWidget::initializeGL() +{ + // create context, command queue and program + m_context = compute::opengl_create_shared_context(); + m_queue = compute::command_queue(m_context, m_context.get_device()); + m_program = compute::program::create_with_source(source, m_context); + m_program.build(); + + // prepare random particle positions that will be transferred to the vbo + float4_* temp = new float4_[m_particles]; + boost::random::uniform_real_distribution<float> dist(-0.5f, 0.5f); + boost::random::mt19937_64 gen; + for(size_t i = 0; i < m_particles; i++) { + temp[i][0] = dist(gen); + temp[i][1] = dist(gen); + temp[i][2] = dist(gen); + temp[i][3] = 1.0f; + } + + // create an OpenGL vbo + GLuint vbo = 0; + glGenBuffers(1, &vbo); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + glBufferData(GL_ARRAY_BUFFER, m_particles*sizeof(float4_), temp, GL_DYNAMIC_DRAW); + + // create a OpenCL buffer from the vbo + m_position = compute::opengl_buffer(m_context, vbo); + delete[] temp; + + // create buffer for velocities + m_velocity = new compute::vector<float4_>(m_particles, m_context); + compute::fill(m_velocity->begin(), m_velocity->end(), float4_(0.0f, 0.0f, 0.0f, 0.0f), m_queue); + + // create compute kernels + m_velocity_kernel = m_program.create_kernel("updateVelocity"); + m_velocity_kernel.set_arg(0, m_position); + m_velocity_kernel.set_arg(1, m_velocity->get_buffer()); + m_velocity_kernel.set_arg(2, m_dt); + m_velocity_kernel.set_arg(3, m_particles); + m_position_kernel = m_program.create_kernel("updatePosition"); + m_position_kernel.set_arg(0, m_position); + m_position_kernel.set_arg(1, m_velocity->get_buffer()); + m_position_kernel.set_arg(2, m_dt); +} +void NBodyWidget::resizeGL(int width, int height) +{ + // update viewport + glViewport(0, 0, width, height); +} +void NBodyWidget::paintGL() +{ + // clear buffer + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + // check if this is the first draw + if(m_initial_draw) { + // do not update particles + m_initial_draw = false; + } else { + // update particles + updateParticles(); + } + + // draw + glVertexPointer(4, GL_FLOAT, 0, 0); + glEnableClientState(GL_VERTEX_ARRAY); + glDrawArrays(GL_POINTS, 0, m_particles); + glFinish(); +} +void NBodyWidget::updateParticles() +{ + // enqueue kernels to update particles and make sure that the command queue is finished + compute::opengl_enqueue_acquire_buffer(m_position, m_queue); + m_queue.enqueue_1d_range_kernel(m_velocity_kernel, 0, m_particles, 0).wait(); + m_queue.enqueue_1d_range_kernel(m_position_kernel, 0, m_particles, 0).wait(); + m_queue.finish(); + compute::opengl_enqueue_release_buffer(m_position, m_queue); +} +void NBodyWidget::keyPressEvent(QKeyEvent* event) +{ + if(event->key() == Qt::Key_Escape) { + this->close(); + } +} + +int main(int argc, char** argv) +{ + // parse command line arguments + po::options_description options("options"); + options.add_options() + ("help", "show usage") + ("particles", po::value<uint_>()->default_value(1000), "number of particles") + ("dt", po::value<float>()->default_value(0.00001f), "width of each integration step"); + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, options), vm); + po::notify(vm); + + if(vm.count("help") > 0) { + std::cout << options << std::endl; + return 0; + } + + const uint_ particles = vm["particles"].as<uint_>(); + const float dt = vm["dt"].as<float>(); + + QApplication app(argc, argv); + NBodyWidget nbody(particles, dt); + + nbody.show(); + + return app.exec(); +} + +#include "nbody.moc" diff --git a/src/boost/libs/compute/example/opencl_test.cpp b/src/boost/libs/compute/example/opencl_test.cpp new file mode 100644 index 00000000..15b85202 --- /dev/null +++ b/src/boost/libs/compute/example/opencl_test.cpp @@ -0,0 +1,165 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// See boost/compute/detail/diagnostic.hpp +// GCC +#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 +#define BOOST_COMPUTE_GCC_DIAG_STR(s) #s +#define BOOST_COMPUTE_GCC_DIAG_JOINSTR(x,y) BOOST_COMPUTE_GCC_DIAG_STR(x ## y) +# define BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(x) _Pragma (#x) +# define BOOST_COMPUTE_GCC_DIAG_PRAGMA(x) BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(GCC diagnostic x) +# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 +# define BOOST_COMPUTE_GCC_DIAG_OFF(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(push) \ + BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) +# define BOOST_COMPUTE_GCC_DIAG_ON(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(pop) +# else +# define BOOST_COMPUTE_GCC_DIAG_OFF(x) \ + BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) +# define BOOST_COMPUTE_GCC_DIAG_ON(x) \ + BOOST_COMPUTE_GCC_DIAG_PRAGMA(warning BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) +# endif +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_GCC_DIAG_OFF(x) +# define BOOST_COMPUTE_GCC_DIAG_ON(x) +#endif + +// Clang +#ifdef __clang__ +# define BOOST_COMPUTE_CLANG_DIAG_STR(s) # s +// stringize s to "no-sign-compare" +# define BOOST_COMPUTE_CLANG_DIAG_JOINSTR(x,y) BOOST_COMPUTE_CLANG_DIAG_STR(x ## y) +// join -W with no-unused-variable to "-Wno-sign-compare" +# define BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(x) _Pragma (#x) +// _Pragma is unary operator #pragma ("") +# define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) \ + BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(clang diagnostic x) +# define BOOST_COMPUTE_CLANG_DIAG_OFF(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(push) \ + BOOST_COMPUTE_CLANG_DIAG_PRAGMA(ignored BOOST_COMPUTE_CLANG_DIAG_JOINSTR(-W,x)) +// For example: #pragma clang diagnostic ignored "-Wno-sign-compare" +# define BOOST_COMPUTE_CLANG_DIAG_ON(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(pop) +// For example: #pragma clang diagnostic warning "-Wno-sign-compare" +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_CLANG_DIAG_OFF(x) +# define BOOST_COMPUTE_CLANG_DIAG_ON(x) +# define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) +#endif + +// MSVC +#if defined(_MSC_VER) +# define BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(x) __pragma(x) +# define BOOST_COMPUTE_MSVC_DIAG_PRAGMA(x) \ + BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(warning(x)) +# define BOOST_COMPUTE_MSVC_DIAG_OFF(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(push) \ + BOOST_COMPUTE_MSVC_DIAG_PRAGMA(disable: x) +# define BOOST_COMPUTE_MSVC_DIAG_ON(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(pop) +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_MSVC_DIAG_OFF(x) +# define BOOST_COMPUTE_MSVC_DIAG_ON(x) +#endif + +#include <iostream> + +// include the proper opencl header for the system +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +// the opencl_test example displays the opencl platforms and devices found +// on the system using the opencl api directly. if this test fails to compile +// and/or run, there is a problem with the opencl implementation found on the +// system. users should ensure this test runs successfuly before using any of +// the boost.compute apis (which depend on a working opencl implementation). +int main() +{ + // Suppress deprecated declarations warning + BOOST_COMPUTE_MSVC_DIAG_OFF(4996); // MSVC + BOOST_COMPUTE_GCC_DIAG_OFF(deprecated-declarations); // GCC + BOOST_COMPUTE_CLANG_DIAG_OFF(deprecated-declarations); // Clang + + // query number of opencl platforms + cl_uint num_platforms = 0; + cl_int ret = clGetPlatformIDs(0, NULL, &num_platforms); + if(ret != CL_SUCCESS){ + std::cerr << "failed to query platforms: " << ret << std::endl; + return -1; + } + + // check that at least one platform was found + if(num_platforms == 0){ + std::cerr << "found 0 platforms" << std::endl; + return 0; + } + + // get platform ids + cl_platform_id *platforms = new cl_platform_id[num_platforms]; + clGetPlatformIDs(num_platforms, platforms, NULL); + + // iterate through each platform and query its devices + for(cl_uint i = 0; i < num_platforms; i++){ + cl_platform_id platform = platforms[i]; + + // query number of opencl devices + cl_uint num_devices = 0; + ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); + if(ret != CL_SUCCESS){ + std::cerr << "failed to lookup devices for platform " << i << std::endl; + continue; + } + + // print number of devices found + std::cout << "platform " << i << " has " << num_devices << " devices:" << std::endl; + + // get device ids for the platform + cl_device_id *devices = new cl_device_id[num_devices]; + ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); + if(ret != CL_SUCCESS){ + std::cerr << "failed to query platform devices" << std::endl; + delete[] devices; + continue; + } + + // iterate through each device on the platform and print its name + for(cl_uint j = 0; j < num_devices; j++){ + cl_device_id device = devices[j]; + + // get length of the device name string + size_t name_length = 0; + ret = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &name_length); + if(ret != CL_SUCCESS){ + std::cerr << "failed to query device name length for device " << j << std::endl; + continue; + } + + // get the device name string + char *name = new char[name_length]; + ret = clGetDeviceInfo(device, CL_DEVICE_NAME, name_length, name, NULL); + if(ret != CL_SUCCESS){ + std::cerr << "failed to query device name string for device " << j << std::endl; + delete[] name; + continue; + } + + // print out the device name + std::cout << " device: " << name << std::endl; + + delete[] name; + } + delete[] devices; + } + delete[] platforms; + + BOOST_COMPUTE_CLANG_DIAG_ON(deprecated-declarations); // Clang + BOOST_COMPUTE_GCC_DIAG_ON(deprecated-declarations); // GCC + BOOST_COMPUTE_MSVC_DIAG_OFF(4996); // MSVC + + return 0; +} diff --git a/src/boost/libs/compute/example/opencv_convolution.cpp b/src/boost/libs/compute/example/opencv_convolution.cpp new file mode 100644 index 00000000..7ba53436 --- /dev/null +++ b/src/boost/libs/compute/example/opencv_convolution.cpp @@ -0,0 +1,265 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Mageswaran.D <mageswaran1989@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <string> + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/compute/utility/source.hpp> + +#include <boost/program_options.hpp> + +namespace compute = boost::compute; +namespace po = boost::program_options; + +// Create convolution program +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( + __kernel void convolution(__read_only image2d_t sourceImage, + __write_only image2d_t outputImage, + __constant float* filter, + int filterWidth) + { + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST; + + // Store each work-item's unique row and column + int x = get_global_id(0); + int y = get_global_id(1); + + // Half the width of the filter is needed for indexing + // memory later + int halfWidth = (int)(filterWidth/2); + + // All accesses to images return data as four-element vector + // (i.e., float4). + float4 sum = {0.0f, 0.0f, 0.0f, 0.0f}; + + // Iterator for the filter + int filterIdx = 0; + + // Each work-item iterates around its local area based on the + // size of the filter + int2 coords; // Coordinates for accessing the image + + // Iterate the filter rows + for(int i = -halfWidth; i <= halfWidth; i++) + { + coords.y = y + i; + + // Iterate over the filter columns + for(int j = -halfWidth; j <= halfWidth; j++) + { + coords.x = x + j; + + float4 pixel; + + // Read a pixel from the image. + // Work on a channel + pixel = read_imagef(sourceImage, sampler, coords); + sum.x += pixel.x * filter[filterIdx++]; + //sum.y += pixel.y * filter[filterIdx++]; + //sum.z += pixel.z * filter[filterIdx++]; + } + } + + barrier(CLK_GLOBAL_MEM_FENCE); + // Copy the data to the output image if the + // work-item is in bounds + if(y < get_image_height(sourceImage) && + x < get_image_width(sourceImage)) + { + coords.x = x; + coords.y = y; + + //Same channel is copied in all three channels + //write_imagef(outputImage, coords, + // (float4)(sum.x,sum.x,sum.x,1.0f)); + + write_imagef(outputImage, coords, sum); + } + } +); + +// This example shows how to read two images or use camera +// with OpenCV, transfer the frames to the GPU, +// and apply a convolution written in OpenCL +int main(int argc, char *argv[]) +{ + /////////////////////////////////////////////////////////////////////////// + + // setup the command line arguments + po::options_description desc; + desc.add_options() + ("help", "show available options") + ("camera", po::value<int>()->default_value(-1), + "if not default camera, specify a camera id") + ("image", po::value<std::string>(), "path to image file"); + + // Parse the command lines + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //check the command line arguments + if(vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } + + /////////////////////////////////////////////////////////////////////////// + + //OpenCV variables + cv::Mat cv_mat; + cv::VideoCapture cap; //OpenCV camera handle. + + //Filter Variables + float filter[9] = { + -1.0, 0.0, 1.0, + -2.0, 0.0, 2.0, + -1.0, 0.0, 1.0, + }; + + // The convolution filter is 3x3 + int filterWidth = 3; + + //OpenCL variables + // Get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + compute::buffer dev_filter(context, sizeof(filter), + compute::memory_object::read_only | + compute::memory_object::copy_host_ptr, + filter); + + compute::program filter_program = + compute::program::create_with_source(source, context); + + try + { + filter_program.build(); + } + catch(compute::opencl_error e) + { + std::cout<<"Build Error: "<<std::endl + <<filter_program.build_log(); + return -1; + } + + // create fliter kernel and set arguments + compute::kernel filter_kernel(filter_program, "convolution"); + + /////////////////////////////////////////////////////////////////////////// + + //check for image paths + if(vm.count("image")) + { + // Read image with OpenCV + cv_mat = cv::imread(vm["image"].as<std::string>(), + CV_LOAD_IMAGE_COLOR); + if(!cv_mat.data){ + std::cerr << "Failed to load image" << std::endl; + return -1; + } + } + else //by default use camera + { + //open camera + cap.open(vm["camera"].as<int>()); + // read first frame + cap >> cv_mat; + if(!cv_mat.data){ + std::cerr << "failed to capture frame" << std::endl; + return -1; + } + } + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); + + // Transfer image/frame data to gpu + compute::image2d dev_input_image = + compute::opencv_create_image2d_with_mat( + cv_mat, compute::image2d::read_write, queue + ); + + // Create output image + // Be sure what will be your ouput image/frame size + compute::image2d dev_output_image( + context, + dev_input_image.width(), + dev_input_image.height(), + dev_input_image.format(), + compute::image2d::write_only + ); + + filter_kernel.set_arg(0, dev_input_image); + filter_kernel.set_arg(1, dev_output_image); + filter_kernel.set_arg(2, dev_filter); + filter_kernel.set_arg(3, filterWidth); + + // run flip kernel + size_t origin[2] = { 0, 0 }; + size_t region[2] = { dev_input_image.width(), + dev_input_image.height() }; + + /////////////////////////////////////////////////////////////////////////// + + queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); + + //check for image paths + if(vm.count("image")) + { + // show host image + cv::imshow("Original Image", cv_mat); + + // show gpu image + compute::opencv_imshow("Convoluted Image", dev_output_image, queue); + + // wait and return + cv::waitKey(0); + } + else + { + char key = '\0'; + while(key != 27) //check for escape key + { + cap >> cv_mat; + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); + + // Update the device image memory with current frame data + compute::opencv_copy_mat_to_image(cv_mat, + dev_input_image,queue); + + // Run the kernel on the device + queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); + + // Show host image + cv::imshow("Camera Frame", cv_mat); + + // Show GPU image + compute::opencv_imshow("Convoluted Frame", dev_output_image, queue); + + // wait + key = cv::waitKey(10); + } + } + return 0; +} diff --git a/src/boost/libs/compute/example/opencv_flip.cpp b/src/boost/libs/compute/example/opencv_flip.cpp new file mode 100644 index 00000000..8cc26ef2 --- /dev/null +++ b/src/boost/libs/compute/example/opencv_flip.cpp @@ -0,0 +1,101 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +// this example shows how to read an image with OpenCV, transfer the +// image to the GPU, and apply a simple flip filter written in OpenCL +int main(int argc, char *argv[]) +{ + // check command line + if(argc < 2){ + std::cerr << "usage: " << argv[0] << " FILENAME" << std::endl; + return -1; + } + + // read image with opencv + cv::Mat cv_image = cv::imread(argv[1], CV_LOAD_IMAGE_COLOR); + if(!cv_image.data){ + std::cerr << "failed to load image" << std::endl; + return -1; + } + + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + // convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(cv_image, cv_image, CV_BGR2BGRA); + + // transfer image to gpu + compute::image2d input_image = + compute::opencv_create_image2d_with_mat( + cv_image, compute::image2d::read_write, queue + ); + + // create output image + compute::image2d output_image( + context, + input_image.width(), + input_image.height(), + input_image.format(), + compute::image2d::write_only + ); + + // create flip program + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void flip_kernel(__read_only image2d_t input, + __write_only image2d_t output) + { + const sampler_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int height = get_image_height(input); + int2 input_coord = { get_global_id(0), get_global_id(1) }; + int2 output_coord = { input_coord.x, height - input_coord.y - 1 }; + float4 value = read_imagef(input, sampler, input_coord); + write_imagef(output, output_coord, value); + } + ); + + compute::program flip_program = + compute::program::create_with_source(source, context); + flip_program.build(); + + // create flip kernel and set arguments + compute::kernel flip_kernel(flip_program, "flip_kernel"); + flip_kernel.set_arg(0, input_image); + flip_kernel.set_arg(1, output_image); + + // run flip kernel + size_t origin[2] = { 0, 0 }; + size_t region[2] = { input_image.width(), input_image.height() }; + queue.enqueue_nd_range_kernel(flip_kernel, 2, origin, region, 0); + + // show host image + cv::imshow("opencv image", cv_image); + + // show gpu image + compute::opencv_imshow("filtered image", output_image, queue); + + // wait and return + cv::waitKey(0); + return 0; +} diff --git a/src/boost/libs/compute/example/opencv_histogram.cpp b/src/boost/libs/compute/example/opencv_histogram.cpp new file mode 100644 index 00000000..e339030b --- /dev/null +++ b/src/boost/libs/compute/example/opencv_histogram.cpp @@ -0,0 +1,228 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Mageswaran.D <mageswaran1989@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//Code sample for calculating histogram using OpenCL and +//displaying image histogram in OpenCV. + +#include <iostream> +#include <string> + +#include <opencv2/imgproc/imgproc.hpp> +#include <opencv2/highgui/highgui.hpp> + +#include <boost/compute/source.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/program_options.hpp> + +namespace compute = boost::compute; +namespace po = boost::program_options; + +// number of bins +int histSize = 256; + +// Set the ranges ( for B,G,R) ) +// TryOut: consider the range in kernel calculation +float range[] = { 0, 256 } ; +const float* histRange = { range }; + +// Create naive histogram program +// Needs "cl_khr_local_int32_base_atomics" extension +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( + __kernel void histogram(read_only image2d_t src_image, + __global int* b_hist, + __global int* g_hist, + __global int* r_hist) + { + sampler_t sampler =( CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST | + CLK_ADDRESS_CLAMP_TO_EDGE); + + int image_width = get_image_width(src_image); + int image_height = get_image_height(src_image); + + int2 coords = (int2)(get_global_id(0), get_global_id(1)); + float4 pixel = read_imagef(src_image,sampler, coords); + + //boundary condition + if ((coords.x < image_width) && (coords.y < image_height)) + { + uchar indx_x, indx_y, indx_z; + indx_x = convert_uchar_sat(pixel.x * 255.0f); + indx_y = convert_uchar_sat(pixel.y * 255.0f); + indx_z = convert_uchar_sat(pixel.z * 255.0f); + + atomic_inc(&b_hist[(uint)indx_z]); + atomic_inc(&g_hist[(uint)indx_y]); + atomic_inc(&r_hist[(uint)indx_x]); + } + } +); + +inline void showHistogramWindow(cv::Mat &b_hist, cv::Mat &g_hist, cv::Mat &r_hist, + std::string window_name) +{ + // Draw the histograms for B, G and R + int hist_w = 1024; + int hist_h = 768; + int bin_w = cvRound((double)hist_w/histSize); + + cv::Mat histImage(hist_h, hist_w, CV_8UC3, cv::Scalar(0,0,0)); + + // Normalize the result to [ 0, histImage.rows ] + cv::normalize(b_hist, b_hist, 0, histImage.rows, cv::NORM_MINMAX, -1, cv::Mat()); + cv::normalize(g_hist, g_hist, 0, histImage.rows, cv::NORM_MINMAX, -1, cv::Mat()); + cv::normalize(r_hist, r_hist, 0, histImage.rows, cv::NORM_MINMAX, -1, cv::Mat()); + + // Draw for each channel + for (int i = 1; i < histSize; i++ ) + { + cv::line(histImage, + cv::Point(bin_w*(i-1), hist_h - cvRound(b_hist.at<float>(i-1))), + cv::Point(bin_w*(i), hist_h - cvRound(b_hist.at<float>(i))), + cv::Scalar(255, 0, 0), + 2, + 8, + 0); + + cv::line(histImage, + cv::Point(bin_w*(i-1), hist_h - cvRound(g_hist.at<float>(i-1))), + cv::Point(bin_w*(i), hist_h - cvRound(g_hist.at<float>(i))), + cv::Scalar(0, 255, 0), + 2, + 8, + 0); + + cv::line(histImage, + cv::Point( bin_w*(i-1), hist_h - cvRound(r_hist.at<float>(i-1))), + cv::Point( bin_w*(i), hist_h - cvRound(r_hist.at<float>(i)) ), + cv::Scalar( 0, 0, 255), + 2, + 8, + 0); + } + + // Display + cv::namedWindow(window_name, CV_WINDOW_AUTOSIZE ); + cv::imshow(window_name, histImage ); +} + +//Get the device context +//Create GPU array/vector +//Copy the image & set up the kernel +//Execute the kernel +//Copy GPU data back to CPU cv::Mat data pointer +//OpenCV conversion for convienient display +void calculateHistogramUsingCL(cv::Mat src, compute::command_queue &queue) +{ + compute::context context = queue.get_context(); + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(src, src, CV_BGR2BGRA); + + //3 channels & 256 bins : alpha channel is ignored + compute::vector<int> gpu_b_hist(histSize, context); + compute::vector<int> gpu_g_hist(histSize, context); + compute::vector<int> gpu_r_hist(histSize, context); + + // Transfer image to gpu + compute::image2d gpu_src = + compute::opencv_create_image2d_with_mat( + src, compute::image2d::read_only, + queue + ); + + compute::program histogram_program = + compute::program::create_with_source(source, context); + histogram_program.build(); + + // create histogram kernel and set arguments + compute::kernel histogram_kernel(histogram_program, "histogram"); + histogram_kernel.set_arg(0, gpu_src); + histogram_kernel.set_arg(1, gpu_b_hist.get_buffer()); + histogram_kernel.set_arg(2, gpu_g_hist.get_buffer()); + histogram_kernel.set_arg(3, gpu_r_hist.get_buffer()); + + // run histogram kernel + // each kernel thread updating red, green & blue bins + size_t origin[2] = { 0, 0 }; + size_t region[2] = { gpu_src.width(), + gpu_src.height() }; + + queue.enqueue_nd_range_kernel(histogram_kernel, 2, origin, region, 0); + + //Make sure kernel get executed and data copied back + queue.finish(); + + //create Mat and copy GPU bins to CPU memory + cv::Mat b_hist(256, 1, CV_32SC1); + compute::copy(gpu_b_hist.begin(), gpu_b_hist.end(), b_hist.data, queue); + cv::Mat g_hist(256, 1, CV_32SC1); + compute::copy(gpu_g_hist.begin(), gpu_g_hist.end(), g_hist.data, queue); + cv::Mat r_hist(256, 1, CV_32SC1); + compute::copy(gpu_r_hist.begin(), gpu_r_hist.end(), r_hist.data, queue); + + b_hist.convertTo(b_hist, CV_32FC1); //converted for displaying + g_hist.convertTo(g_hist, CV_32FC1); + r_hist.convertTo(r_hist, CV_32FC1); + + showHistogramWindow(b_hist, g_hist, r_hist, "Histogram"); +} + +int main( int argc, char** argv ) +{ + // Get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + cv::Mat src; + + // setup the command line arguments + po::options_description desc; + desc.add_options() + ("help", "show available options") + ("image", po::value<std::string>(), "path to image file"); + + // Parse the command lines + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //check the command line arguments + if(vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } + + //check for image paths + if(vm.count("image")) + { + // Read image with OpenCV + src = cv::imread(vm["image"].as<std::string>(), + CV_LOAD_IMAGE_COLOR); + if(!src.data){ + std::cerr << "Failed to load image" << std::endl; + return -1; + } + calculateHistogramUsingCL(src, queue); + cv::imshow("Image", src); + cv::waitKey(0); + } + else + { + std::cout << desc << std::endl; + return 0; + } + return 0; +} diff --git a/src/boost/libs/compute/example/opencv_optical_flow.cpp b/src/boost/libs/compute/example/opencv_optical_flow.cpp new file mode 100644 index 00000000..87f330ae --- /dev/null +++ b/src/boost/libs/compute/example/opencv_optical_flow.cpp @@ -0,0 +1,289 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Mageswaran.D <mageswaran1989@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <string> + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/compute/utility/source.hpp> + +#include <boost/program_options.hpp> + +namespace compute = boost::compute; +namespace po = boost::program_options; + +// Create naive optical flow program +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( + const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE; + + __kernel void optical_flow ( + read_only + image2d_t current_image, + read_only image2d_t previous_image, + write_only image2d_t optical_flow, + const float scale, + const float offset, + const float lambda, + const float threshold ) + { + int2 coords = (int2)(get_global_id(0), get_global_id(1)); + float4 current_pixel = read_imagef(current_image, + sampler, + coords); + float4 previous_pixel = read_imagef(previous_image, + sampler, + coords); + int2 x1 = (int2)(offset, 0.f); + int2 y1 = (int2)(0.f, offset); + + //get the difference + float4 curdif = previous_pixel - current_pixel; + + //calculate the gradient + //Image 2 first + float4 gradx = read_imagef(previous_image, + sampler, + coords+x1) - + read_imagef(previous_image, + sampler, + coords-x1); + //Image 1 + gradx += read_imagef(current_image, + sampler, + coords+x1) - + read_imagef(current_image, + sampler, + coords-x1); + //Image 2 first + float4 grady = read_imagef(previous_image, + sampler, + coords+y1) - + read_imagef(previous_image, + sampler, + coords-y1); + //Image 1 + grady += read_imagef(current_image, + sampler, + coords+y1) - + read_imagef(current_image, + sampler, + coords-y1); + + float4 sqr = (gradx*gradx) + (grady*grady) + + (float4)(lambda,lambda, lambda, lambda); + float4 gradmag = sqrt(sqr); + + /////////////////////////////////////////////////// + float4 vx = curdif * (gradx / gradmag); + float vxd = vx.x;//assumes greyscale + + //format output for flowrepos, out(-x,+x,-y,+y) + float2 xout = (float2)(fmax(vxd,0.f),fabs(fmin(vxd,0.f))); + xout *= scale; + /////////////////////////////////////////////////// + float4 vy = curdif*(grady/gradmag); + float vyd = vy.x;//assumes greyscale + + //format output for flowrepos, out(-x,+x,-y,+y) + float2 yout = (float2)(fmax(vyd,0.f),fabs(fmin(vyd,0.f))); + yout *= scale; + /////////////////////////////////////////////////// + + float4 out = (float4)(xout, yout); + float cond = (float)isgreaterequal(length(out), threshold); + out *= cond; + + write_imagef(optical_flow, coords, out); + } +); + +// This example shows how to read two images or use camera +// with OpenCV, transfer the frames to the GPU, +// and apply a naive optical flow algorithm +// written in OpenCL +int main(int argc, char *argv[]) +{ + // setup the command line arguments + po::options_description desc; + desc.add_options() + ("help", "show available options") + ("camera", po::value<int>()->default_value(-1), + "if not default camera, specify a camera id") + ("image1", po::value<std::string>(), "path to image file 1") + ("image2", po::value<std::string>(), "path to image file 2"); + + // Parse the command lines + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //check the command line arguments + if(vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } + + //OpenCV variables + cv::Mat previous_cv_image; + cv::Mat current_cv_image; + cv::VideoCapture cap; //OpenCV camera handle + + //check for image paths + if(vm.count("image1") && vm.count("image2")) + { + // Read image 1 with OpenCV + previous_cv_image = cv::imread(vm["image1"].as<std::string>(), + CV_LOAD_IMAGE_COLOR); + if(!previous_cv_image.data){ + std::cerr << "Failed to load image" << std::endl; + return -1; + } + + // Read image 2 with opencv + current_cv_image = cv::imread(vm["image2"].as<std::string>(), + CV_LOAD_IMAGE_COLOR); + if(!current_cv_image.data){ + std::cerr << "Failed to load image" << std::endl; + return -1; + } + } + else //by default use camera + { + //open camera + cap.open(vm["camera"].as<int>()); + // read first frame + cap >> previous_cv_image; + if(!previous_cv_image.data){ + std::cerr << "failed to capture frame" << std::endl; + return -1; + } + + // read second frame + cap >> current_cv_image; + if(!current_cv_image.data){ + std::cerr << "failed to capture frame" << std::endl; + return -1; + } + + } + + // Get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(previous_cv_image, previous_cv_image, CV_BGR2BGRA); + cv::cvtColor(current_cv_image, current_cv_image, CV_BGR2BGRA); + + // Transfer image to gpu + compute::image2d dev_previous_image = + compute::opencv_create_image2d_with_mat( + previous_cv_image, compute::image2d::read_write, queue + ); + // Transfer image to gpu + compute::image2d dev_current_image = + compute::opencv_create_image2d_with_mat( + current_cv_image, compute::image2d::read_write, queue + ); + + // Create output image + compute::image2d dev_output_image( + context, + dev_previous_image.width(), + dev_previous_image.height(), + dev_previous_image.format(), + compute::image2d::write_only + ); + + compute::program optical_program = + compute::program::create_with_source(source, context); + optical_program.build(); + + // create flip kernel and set arguments + compute::kernel optical_kernel(optical_program, "optical_flow"); + float scale = 10; + float offset = 1; + float lambda = 0.0025; + float threshold = 1.0; + + optical_kernel.set_arg(0, dev_previous_image); + optical_kernel.set_arg(1, dev_current_image); + optical_kernel.set_arg(2, dev_output_image); + optical_kernel.set_arg(3, scale); + optical_kernel.set_arg(4, offset); + optical_kernel.set_arg(5, lambda); + optical_kernel.set_arg(6, threshold); + + // run flip kernel + size_t origin[2] = { 0, 0 }; + size_t region[2] = { dev_previous_image.width(), + dev_previous_image.height() }; + queue.enqueue_nd_range_kernel(optical_kernel, 2, origin, region, 0); + + //check for image paths + if(vm.count("image1") && vm.count("image2")) + { + // show host image + cv::imshow("Previous Frame", previous_cv_image); + cv::imshow("Current Frame", current_cv_image); + + // show gpu image + compute::opencv_imshow("filtered image", dev_output_image, queue); + + // wait and return + cv::waitKey(0); + } + else + { + char key = '\0'; + while(key != 27) //check for escape key + { + cap >> current_cv_image; + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(current_cv_image, current_cv_image, CV_BGR2BGRA); + + // Update the device image memory with current frame data + compute::opencv_copy_mat_to_image(previous_cv_image, + dev_previous_image, + queue); + compute::opencv_copy_mat_to_image(current_cv_image, + dev_current_image, + queue); + + // Run the kernel on the device + queue.enqueue_nd_range_kernel(optical_kernel, 2, origin, region, 0); + + // Show host image + cv::imshow("Previous Frame", previous_cv_image); + cv::imshow("Current Frame", current_cv_image); + + // Show GPU image + compute::opencv_imshow("filtered image", dev_output_image, queue); + + // Copy current frame container to previous frame container + current_cv_image.copyTo(previous_cv_image); + + // wait + key = cv::waitKey(10); + } + + } + return 0; +} + diff --git a/src/boost/libs/compute/example/opencv_sobel_filter.cpp b/src/boost/libs/compute/example/opencv_sobel_filter.cpp new file mode 100644 index 00000000..4fcfa206 --- /dev/null +++ b/src/boost/libs/compute/example/opencv_sobel_filter.cpp @@ -0,0 +1,254 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Mageswaran.D <mageswaran1989@gmail.com> +// +// Book Refered: OpenCL Programming Guide +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//---------------------------------------------------------------------------// +// About Sobel Filter: +// * Edge Filter - distinguishes the differrent color region +// * Finds the gradient in x and y-axes +// * Three step process +// -> Find x-axis gradient with kernel/matrix +// Gx = [-1 0 +1] +// [-2 0 +2] +// [-1 0 +1] +// -> Find y-axis gradient with kernel/matrix +// Gy = [-1 -2 -1] +// [ 0 0 0] +// [+1 +2 +1] +// * Gradient magnitude G = sqrt(Gx^2 + Gy^2) +//---------------------------------------------------------------------------// + +#include <iostream> +#include <string> + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/compute/utility/source.hpp> + +#include <boost/program_options.hpp> + +namespace compute = boost::compute; +namespace po = boost::program_options; + +// Create sobel filter program +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE ( + //For out of boundary pixels, edge pixel + // value is returned + const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST; + kernel void sobel_rgb(read_only image2d_t src, write_only image2d_t dst) + { + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + + if (x >= get_image_width(src) || y >= get_image_height(src)) + return; + + // [(x-1, y+1), (x, y+1), (x+1, y+1)] + // [(x-1, y ), (x, y ), (x+1, y )] + // [(x-1, y-1), (x, y-1), (x+1, y-1)] + + // [p02, p12, p22] + // [p01, pixel, p21] + // [p00, p10, p20] + + //Basically finding influence of neighbour pixels on current pixel + float4 p00 = read_imagef(src, sampler, (int2)(x - 1, y - 1)); + float4 p10 = read_imagef(src, sampler, (int2)(x, y - 1)); + float4 p20 = read_imagef(src, sampler, (int2)(x + 1, y - 1)); + + float4 p01 = read_imagef(src, sampler, (int2)(x - 1, y)); + //pixel that we are working on + float4 p21 = read_imagef(src, sampler, (int2)(x + 1, y)); + + float4 p02 = read_imagef(src, sampler, (int2)(x - 1, y + 1)); + float4 p12 = read_imagef(src, sampler, (int2)(x, y + 1)); + float4 p22 = read_imagef(src, sampler, (int2)(x + 1, y + 1)); + + //Find Gx = kernel + 3x3 around current pixel + // Gx = [-1 0 +1] [p02, p12, p22] + // [-2 0 +2] + [p01, pixel, p21] + // [-1 0 +1] [p00, p10, p20] + float3 gx = -p00.xyz + p20.xyz + + 2.0f * (p21.xyz - p01.xyz) + -p02.xyz + p22.xyz; + + //Find Gy = kernel + 3x3 around current pixel + // Gy = [-1 -2 -1] [p02, p12, p22] + // [ 0 0 0] + [p01, pixel, p21] + // [+1 +2 +1] [p00, p10, p20] + float3 gy = p00.xyz + p20.xyz + + 2.0f * (- p12.xyz + p10.xyz) - + p02.xyz - p22.xyz; + //Find G + float3 g = native_sqrt(gx * gx + gy * gy); + + // we could also approximate this as g = fabs(gx) + fabs(gy) + write_imagef(dst, (int2)(x, y), (float4)(g.x, g.y, g.z, 1.0f)); + } +); + +// This example shows how to apply sobel filter on images or on camera frames +// with OpenCV, transfer the frames to the GPU, and apply a sobel filter +// written in OpenCL +int main(int argc, char *argv[]) +{ + /////////////////////////////////////////////////////////////////////////// + + // setup the command line arguments + po::options_description desc; + desc.add_options() + ("help", "show available options") + ("camera", po::value<int>()->default_value(-1), + "if not default camera, specify a camera id") + ("image", po::value<std::string>(), "path to image file"); + + // Parse the command lines + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + //check the command line arguments + if(vm.count("help")) + { + std::cout << desc << std::endl; + return 0; + } + + /////////////////////////////////////////////////////////////////////////// + + //OpenCV variables + cv::Mat cv_mat; + cv::VideoCapture cap; //OpenCV camera handle. + + //OpenCL variables + // Get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + compute::program filter_program = + compute::program::create_with_source(source, context); + + try + { + filter_program.build(); + } + catch(compute::opencl_error e) + { + std::cout<<"Build Error: "<<std::endl + <<filter_program.build_log(); + } + + // create fliter kernel and set arguments + compute::kernel filter_kernel(filter_program, "sobel_rgb"); + + /////////////////////////////////////////////////////////////////////////// + + //check for image paths + if(vm.count("image")) + { + // Read image with OpenCV + cv_mat = cv::imread(vm["image"].as<std::string>(), + CV_LOAD_IMAGE_COLOR); + if(!cv_mat.data){ + std::cerr << "Failed to load image" << std::endl; + return -1; + } + } + else //by default use camera + { + //open camera + cap.open(vm["camera"].as<int>()); + // read first frame + cap >> cv_mat; + if(!cv_mat.data){ + std::cerr << "failed to capture frame" << std::endl; + return -1; + } + } + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); + + // Transfer image/frame data to gpu + compute::image2d dev_input_image = + compute::opencv_create_image2d_with_mat( + cv_mat, compute::image2d::read_write, queue + ); + + // Create output image + // Be sure what will be your ouput image/frame size + compute::image2d dev_output_image( + context, + dev_input_image.width(), + dev_input_image.height(), + dev_input_image.format(), + compute::image2d::write_only + ); + + filter_kernel.set_arg(0, dev_input_image); + filter_kernel.set_arg(1, dev_output_image); + + + // run flip kernel + size_t origin[2] = { 0, 0 }; + size_t region[2] = { dev_input_image.width(), + dev_input_image.height() }; + + /////////////////////////////////////////////////////////////////////////// + + queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); + + //check for image paths + if(vm.count("image")) + { + // show host image + cv::imshow("Original Image", cv_mat); + + // show gpu image + compute::opencv_imshow("Filtered Image", dev_output_image, queue); + + // wait and return + cv::waitKey(0); + } + else + { + char key = '\0'; + while(key != 27) //check for escape key + { + cap >> cv_mat; + + // Convert image to BGRA (OpenCL requires 16-byte aligned data) + cv::cvtColor(cv_mat, cv_mat, CV_BGR2BGRA); + + // Update the device image memory with current frame data + compute::opencv_copy_mat_to_image(cv_mat, + dev_input_image,queue); + + // Run the kernel on the device + queue.enqueue_nd_range_kernel(filter_kernel, 2, origin, region, 0); + + // Show host image + cv::imshow("Camera Frame", cv_mat); + + // Show GPU image + compute::opencv_imshow("Filtered RGB Frame", dev_output_image, queue); + + // wait + key = cv::waitKey(10); + } + } + return 0; +} diff --git a/src/boost/libs/compute/example/opengl_sphere.cpp b/src/boost/libs/compute/example/opengl_sphere.cpp new file mode 100644 index 00000000..38999f34 --- /dev/null +++ b/src/boost/libs/compute/example/opengl_sphere.cpp @@ -0,0 +1,242 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <algorithm> + +#include <GL/gl.h> + +#include <vtkActor.h> +#include <vtkCamera.h> +#include <vtkgl.h> +#include <vtkInteractorStyleSwitch.h> +#include <vtkMapper.h> +#include <vtkObjectFactory.h> +#include <vtkOpenGLExtensionManager.h> +#include <vtkOpenGLRenderWindow.h> +#include <vtkProperty.h> +#include <vtkRenderer.h> +#include <vtkRenderWindow.h> +#include <vtkRenderWindowInteractor.h> +#include <vtkSmartPointer.h> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/iota.hpp> +#include <boost/compute/interop/opengl.hpp> +#include <boost/compute/interop/vtk.hpp> +#include <boost/compute/utility/dim.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +// tesselates a sphere with radius, phi_slices, and theta_slices. returns +// a shared opencl/opengl buffer containing the vertex data. +compute::opengl_buffer tesselate_sphere(float radius, + size_t phi_slices, + size_t theta_slices, + compute::command_queue &queue) +{ + using compute::dim; + + const compute::context &context = queue.get_context(); + + const size_t vertex_count = phi_slices * theta_slices; + + // create opengl buffer + GLuint vbo; + vtkgl::GenBuffersARB(1, &vbo); + vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, vbo); + vtkgl::BufferDataARB(vtkgl::ARRAY_BUFFER, + sizeof(float) * 4 * vertex_count, + NULL, + vtkgl::STREAM_DRAW); + vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, 0); + + // create shared opengl/opencl buffer + compute::opengl_buffer vertex_buffer(context, vbo); + + // tesselate_sphere kernel source + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void tesselate_sphere(float radius, + uint phi_slices, + uint theta_slices, + __global float4 *vertex_buffer) + { + const uint phi_i = get_global_id(0); + const uint theta_i = get_global_id(1); + + const float phi = phi_i * 2.f * M_PI_F / phi_slices; + const float theta = theta_i * 2.f * M_PI_F / theta_slices; + + float4 v; + v.x = radius * cos(theta) * cos(phi); + v.y = radius * cos(theta) * sin(phi); + v.z = radius * sin(theta); + v.w = 1.f; + + vertex_buffer[phi_i*phi_slices+theta_i] = v; + } + ); + + // build tesselate_sphere program + compute::program program = + compute::program::create_with_source(source, context); + program.build(); + + // setup tesselate_sphere kernel + compute::kernel kernel(program, "tesselate_sphere"); + kernel.set_arg<compute::float_>(0, radius); + kernel.set_arg<compute::uint_>(1, phi_slices); + kernel.set_arg<compute::uint_>(2, theta_slices); + kernel.set_arg(3, vertex_buffer); + + // acqurire buffer so that it is accessible to OpenCL + compute::opengl_enqueue_acquire_buffer(vertex_buffer, queue); + + // execute tesselate_sphere kernel + queue.enqueue_nd_range_kernel( + kernel, dim(0, 0), dim(phi_slices, theta_slices), dim(1, 1) + ); + + // release buffer so that it is accessible to OpenGL + compute::opengl_enqueue_release_buffer(vertex_buffer, queue); + + return vertex_buffer; +} + +// simple vtkMapper subclass to render the tesselated sphere on the gpu. +class gpu_sphere_mapper : public vtkMapper +{ +public: + vtkTypeMacro(gpu_sphere_mapper, vtkMapper) + + static gpu_sphere_mapper* New() + { + return new gpu_sphere_mapper; + } + + void Render(vtkRenderer *renderer, vtkActor *actor) + { + if(!m_initialized){ + Initialize(renderer, actor); + m_initialized = true; + } + + if(!m_tesselated){ + m_vertex_count = m_phi_slices * m_theta_slices; + + // tesselate sphere + m_vertex_buffer = tesselate_sphere( + m_radius, m_phi_slices, m_theta_slices, m_command_queue + ); + + // ensure tesselation is finished (seems to be required on AMD) + m_command_queue.finish(); + + // set tesselated flag to true + m_tesselated = true; + } + + // draw sphere + glEnableClientState(GL_VERTEX_ARRAY); + vtkgl::BindBufferARB(vtkgl::ARRAY_BUFFER, m_vertex_buffer.get_opengl_object()); + glVertexPointer(4, GL_FLOAT, sizeof(float)*4, 0); + glDrawArrays(GL_POINTS, 0, m_vertex_count); + } + + void Initialize(vtkRenderer *renderer, vtkActor *actor) + { + // initialize opengl extensions + vtkOpenGLExtensionManager *extensions = + static_cast<vtkOpenGLRenderWindow *>(renderer->GetRenderWindow()) + ->GetExtensionManager(); + extensions->LoadExtension("GL_ARB_vertex_buffer_object"); + + // initialize opencl/opengl shared context + m_context = compute::opengl_create_shared_context(); + compute::device device = m_context.get_device(); + std::cout << "device: " << device.name() << std::endl; + + // create command queue for the gpu device + m_command_queue = compute::command_queue(m_context, device); + } + + double* GetBounds() + { + static double bounds[6]; + bounds[0] = -m_radius; bounds[1] = m_radius; + bounds[2] = -m_radius; bounds[3] = m_radius; + bounds[4] = -m_radius; bounds[5] = m_radius; + return bounds; + } + +protected: + gpu_sphere_mapper() + { + m_radius = 5.0f; + m_phi_slices = 100; + m_theta_slices = 100; + m_initialized = false; + m_tesselated = false; + } + +private: + float m_radius; + int m_phi_slices; + int m_theta_slices; + int m_vertex_count; + bool m_initialized; + bool m_tesselated; + compute::context m_context; + compute::command_queue m_command_queue; + compute::opengl_buffer m_vertex_buffer; +}; + +int main(int argc, char *argv[]) +{ + // create gpu sphere mapper + vtkSmartPointer<gpu_sphere_mapper> mapper = + vtkSmartPointer<gpu_sphere_mapper>::New(); + + // create actor for gpu sphere mapper + vtkSmartPointer<vtkActor> actor = + vtkSmartPointer<vtkActor>::New(); + actor->GetProperty()->LightingOff(); + actor->GetProperty()->SetInterpolationToFlat(); + actor->SetMapper(mapper); + + // create render window + vtkSmartPointer<vtkRenderer> renderer = + vtkSmartPointer<vtkRenderer>::New(); + renderer->SetBackground(.1, .2, .31); + vtkSmartPointer<vtkRenderWindow> renderWindow = + vtkSmartPointer<vtkRenderWindow>::New(); + renderWindow->SetSize(800, 600); + renderWindow->AddRenderer(renderer); + vtkSmartPointer<vtkRenderWindowInteractor> renderWindowInteractor = + vtkSmartPointer<vtkRenderWindowInteractor>::New(); + vtkInteractorStyleSwitch *interactorStyle = + vtkInteractorStyleSwitch::SafeDownCast( + renderWindowInteractor->GetInteractorStyle() + ); + interactorStyle->SetCurrentStyleToTrackballCamera(); + renderWindowInteractor->SetRenderWindow(renderWindow); + renderer->AddActor(actor); + + // render + renderer->ResetCamera(); + vtkCamera *camera = renderer->GetActiveCamera(); + camera->Elevation(-90.0); + renderWindowInteractor->Initialize(); + renderWindow->Render(); + renderWindowInteractor->Start(); + + return 0; +} diff --git a/src/boost/libs/compute/example/point_centroid.cpp b/src/boost/libs/compute/example/point_centroid.cpp new file mode 100644 index 00000000..e5691f7c --- /dev/null +++ b/src/boost/libs/compute/example/point_centroid.cpp @@ -0,0 +1,68 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//[point_centroid_example + +#include <iostream> + +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/accumulate.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace compute = boost::compute; + +// the point centroid example calculates and displays the +// centroid of a set of 3D points stored as float4's +int main() +{ + using compute::float4_; + + // get default device and setup context + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + // point coordinates + float points[] = { 1.0f, 2.0f, 3.0f, 0.0f, + -2.0f, -3.0f, 4.0f, 0.0f, + 1.0f, -2.0f, 2.5f, 0.0f, + -7.0f, -3.0f, -2.0f, 0.0f, + 3.0f, 4.0f, -5.0f, 0.0f }; + + // create vector for five points + compute::vector<float4_> vector(5, context); + + // copy point data to the device + compute::copy( + reinterpret_cast<float4_ *>(points), + reinterpret_cast<float4_ *>(points) + 5, + vector.begin(), + queue + ); + + // calculate sum + float4_ sum = compute::accumulate( + vector.begin(), vector.end(), float4_(0, 0, 0, 0), queue + ); + + // calculate centroid + float4_ centroid; + for(size_t i = 0; i < 3; i++){ + centroid[i] = sum[i] / 5.0f; + } + + // print centroid + std::cout << "centroid: " << centroid << std::endl; + + return 0; +} + +//] diff --git a/src/boost/libs/compute/example/price_cross.cpp b/src/boost/libs/compute/example/price_cross.cpp new file mode 100644 index 00000000..95196494 --- /dev/null +++ b/src/boost/libs/compute/example/price_cross.cpp @@ -0,0 +1,87 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/iterator/zip_iterator.hpp> + +namespace compute = boost::compute; + +// this example shows how to use the find_if() algorithm to detect the +// point at which two vectors of prices (such as stock prices) cross. +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + // prices #1 (from 10.0 to 11.0) + std::vector<float> prices1; + for(float i = 10.0; i <= 11.0; i += 0.1){ + prices1.push_back(i); + } + + // prices #2 (from 11.0 to 10.0) + std::vector<float> prices2; + for(float i = 11.0; i >= 10.0; i -= 0.1){ + prices2.push_back(i); + } + + // create gpu vectors + compute::vector<float> gpu_prices1(prices1.size(), context); + compute::vector<float> gpu_prices2(prices2.size(), context); + + // copy prices to gpu + compute::copy(prices1.begin(), prices1.end(), gpu_prices1.begin(), queue); + compute::copy(prices2.begin(), prices2.end(), gpu_prices2.begin(), queue); + + // function returning true if the second price is less than the first price + BOOST_COMPUTE_FUNCTION(bool, check_price_cross, (boost::tuple<float, float> prices), + { + // first price + const float first = boost_tuple_get(prices, 0); + + // second price + const float second = boost_tuple_get(prices, 1); + + // return true if second price is less than first + return second < first; + }); + + // find cross point (should be 10.5) + compute::vector<float>::iterator iter = boost::get<0>( + compute::find_if( + compute::make_zip_iterator( + boost::make_tuple(gpu_prices1.begin(), gpu_prices2.begin()) + ), + compute::make_zip_iterator( + boost::make_tuple(gpu_prices1.end(), gpu_prices2.end()) + ), + check_price_cross, + queue + ).get_iterator_tuple() + ); + + // print out result + int index = std::distance(gpu_prices1.begin(), iter); + std::cout << "price cross at index: " << index << std::endl; + + float value; + compute::copy_n(iter, 1, &value, queue); + std::cout << "value: " << value << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/print_vector.cpp b/src/boost/libs/compute/example/print_vector.cpp new file mode 100644 index 00000000..9ec5c6c7 --- /dev/null +++ b/src/boost/libs/compute/example/print_vector.cpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <vector> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/iota.hpp> +#include <boost/compute/container/vector.hpp> + +namespace compute = boost::compute; + +// this example demonstrates how to print the values in a vector +int main() +{ + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + // create vector on the device and fill with the sequence 1..10 + compute::vector<int> vector(10, context); + compute::iota(vector.begin(), vector.end(), 1, queue); + +//[print_vector_example + std::cout << "vector: [ "; + boost::compute::copy( + vector.begin(), vector.end(), + std::ostream_iterator<int>(std::cout, ", "), + queue + ); + std::cout << "]" << std::endl; +//] + + return 0; +} diff --git a/src/boost/libs/compute/example/qimage_blur.cpp b/src/boost/libs/compute/example/qimage_blur.cpp new file mode 100644 index 00000000..cbfa3145 --- /dev/null +++ b/src/boost/libs/compute/example/qimage_blur.cpp @@ -0,0 +1,145 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <algorithm> + +#include <QtGlobal> +#if QT_VERSION >= 0x050000 +#include <QtWidgets> +#else +#include <QtGui> +#endif + +#ifndef Q_MOC_RUN +#include <boost/compute/system.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/interop/qt.hpp> +#include <boost/compute/utility/dim.hpp> +#include <boost/compute/utility/source.hpp> +#endif // Q_MOC_RUN + +namespace compute = boost::compute; + +inline void box_filter_image(const compute::image2d &input, + compute::image2d &output, + compute::uint_ box_height, + compute::uint_ box_width, + compute::command_queue &queue) +{ + using compute::dim; + + const compute::context &context = queue.get_context(); + + // simple box filter kernel source + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void box_filter(__read_only image2d_t input, + __write_only image2d_t output, + uint box_height, + uint box_width) + { + int x = get_global_id(0); + int y = get_global_id(1); + int h = get_image_height(input); + int w = get_image_width(input); + int k = box_width; + int l = box_height; + + if(x < k/2 || y < l/2 || x >= w-(k/2) || y >= h-(l/2)){ + write_imagef(output, (int2)(x, y), (float4)(0, 0, 0, 1)); + } + else { + const sampler_t sampler = CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + + float4 sum = { 0, 0, 0, 0 }; + for(int i = 0; i < k; i++){ + for(int j = 0; j < l; j++){ + sum += read_imagef(input, sampler, (int2)(x+i-k, y+j-l)); + } + } + sum /= (float) k * l; + float4 value = (float4)( sum.x, sum.y, sum.z, 1.f ); + write_imagef(output, (int2)(x, y), value); + } + } + ); + + // build box filter program + compute::program program = + compute::program::create_with_source(source, context); + program.build(); + + // setup box filter kernel + compute::kernel kernel(program, "box_filter"); + kernel.set_arg(0, input); + kernel.set_arg(1, output); + kernel.set_arg(2, box_height); + kernel.set_arg(3, box_width); + + // execute the box filter kernel + queue.enqueue_nd_range_kernel(kernel, dim(0, 0), input.size(), dim(1, 1)); +} + +// this example shows how to load an image using Qt, apply a simple +// box blur filter, and then display it in a Qt window. +int main(int argc, char *argv[]) +{ + QApplication app(argc, argv); + + // check command line + if(argc < 2){ + std::cout << "usage: qimage_blur [FILENAME]" << std::endl; + return -1; + } + + // load image using Qt + QString fileName = argv[1]; + QImage qimage(fileName); + + size_t height = qimage.height(); + size_t width = qimage.width(); + size_t bytes_per_line = qimage.bytesPerLine(); + + qDebug() << "height:" << height + << "width:" << width + << "bytes per line:" << bytes_per_line + << "depth:" << qimage.depth() + << "format:" << qimage.format(); + + // create compute context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + std::cout << "device: " << gpu.name() << std::endl; + + // get the opencl image format for the qimage + compute::image_format format = + compute::qt_qimage_format_to_image_format(qimage.format()); + + // create input and output images on the gpu + compute::image2d input_image(context, width, height, format); + compute::image2d output_image(context, width, height, format); + + // copy host qimage to gpu image + compute::qt_copy_qimage_to_image2d(qimage, input_image, queue); + + // apply box filter + box_filter_image(input_image, output_image, 7, 7, queue); + + // copy gpu blurred image from to host qimage + compute::qt_copy_image2d_to_qimage(output_image, qimage, queue); + + // show image as a pixmap + QLabel label; + label.setPixmap(QPixmap::fromImage(qimage)); + label.show(); + + return app.exec(); +} diff --git a/src/boost/libs/compute/example/random_walk.cpp b/src/boost/libs/compute/example/random_walk.cpp new file mode 100644 index 00000000..13106d96 --- /dev/null +++ b/src/boost/libs/compute/example/random_walk.cpp @@ -0,0 +1,153 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <opencv2/core/core.hpp> +#include <opencv2/highgui/highgui.hpp> +#include <opencv2/imgproc/imgproc.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/inclusive_scan.hpp> +#include <boost/compute/algorithm/inclusive_scan.hpp> +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> +#include <boost/compute/random/default_random_engine.hpp> +#include <boost/compute/random/uniform_real_distribution.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +// this example uses the random-number generation functions in Boost.Compute +// to calculate a large number of random "steps" and then plots the final +// random "walk" in a 2D image on the GPU and displays it with OpenCV +int main() +{ + // number of random steps to take + size_t steps = 250000; + + // height and width of image + size_t height = 800; + size_t width = 800; + + // get default device and setup context + compute::device gpu = compute::system::default_device(); + compute::context context(gpu); + compute::command_queue queue(context, gpu); + + using compute::int2_; + + // calaculate random values for each step + compute::vector<float> random_values(steps, context); + compute::default_random_engine random_engine(queue); + compute::uniform_real_distribution<float> random_distribution(0.f, 4.f); + + random_distribution.generate( + random_values.begin(), random_values.end(), random_engine, queue + ); + + // calaculate coordinates for each step + compute::vector<int2_> coordinates(steps, context); + + // function to convert random values to random directions (in 2D) + BOOST_COMPUTE_FUNCTION(int2_, take_step, (const float x), + { + if(x < 1.f){ + // move right + return (int2)(1, 0); + } + if(x < 2.f){ + // move up + return (int2)(0, 1); + } + if(x < 3.f){ + // move left + return (int2)(-1, 0); + } + else { + // move down + return (int2)(0, -1); + } + }); + + // transform the random values into random steps + compute::transform( + random_values.begin(), random_values.end(), coordinates.begin(), take_step, queue + ); + + // set staring position + int2_ starting_position(width / 2, height / 2); + compute::copy_n(&starting_position, 1, coordinates.begin(), queue); + + // scan steps to calculate position after each step + compute::inclusive_scan( + coordinates.begin(), coordinates.end(), coordinates.begin(), queue + ); + + // create output image + compute::image2d image( + context, width, height, compute::image_format(CL_RGBA, CL_UNSIGNED_INT8) + ); + + // program with two kernels, one to fill the image with white, and then + // one the draw to points calculated in coordinates on the image + const char draw_walk_source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void draw_walk(__global const int2 *coordinates, + __write_only image2d_t image) + { + const uint i = get_global_id(0); + const int2 coord = coordinates[i]; + + if(coord.x > 0 && coord.x < get_image_width(image) && + coord.y > 0 && coord.y < get_image_height(image)){ + uint4 black = { 0, 0, 0, 0 }; + write_imageui(image, coord, black); + } + } + + __kernel void fill_white(__write_only image2d_t image) + { + const int2 coord = { get_global_id(0), get_global_id(1) }; + + if(coord.x < get_image_width(image) && + coord.y < get_image_height(image)){ + uint4 white = { 255, 255, 255, 255 }; + write_imageui(image, coord, white); + } + } + ); + + // build the program + compute::program draw_program = + compute::program::build_with_source(draw_walk_source, context); + + // fill image with white + compute::kernel fill_kernel(draw_program, "fill_white"); + fill_kernel.set_arg(0, image); + + const size_t offset[] = { 0, 0 }; + const size_t bounds[] = { width, height }; + + queue.enqueue_nd_range_kernel(fill_kernel, 2, offset, bounds, 0); + + // draw random walk + compute::kernel draw_kernel(draw_program, "draw_walk"); + draw_kernel.set_arg(0, coordinates); + draw_kernel.set_arg(1, image); + queue.enqueue_1d_range_kernel(draw_kernel, 0, coordinates.size(), 0); + + // show image + compute::opencv_imshow("random walk", image, queue); + + // wait and return + cv::waitKey(0); + + return 0; +} diff --git a/src/boost/libs/compute/example/resize_image.cpp b/src/boost/libs/compute/example/resize_image.cpp new file mode 100644 index 00000000..d9e29edd --- /dev/null +++ b/src/boost/libs/compute/example/resize_image.cpp @@ -0,0 +1,253 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <algorithm> + +#include <QtGlobal> +#if QT_VERSION >= 0x050000 +#include <QtWidgets> +#else +#include <QtGui> +#endif +#include <QtOpenGL> + +#include <boost/program_options.hpp> + +#ifndef Q_MOC_RUN +#include <boost/compute/command_queue.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/image/image_sampler.hpp> +#include <boost/compute/interop/qt.hpp> +#include <boost/compute/interop/opengl.hpp> +#include <boost/compute/utility/source.hpp> +#endif // Q_MOC_RUN + +namespace compute = boost::compute; +namespace po = boost::program_options; + +// opencl source code +const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void resize_image(__read_only image2d_t input, + const sampler_t sampler, + __write_only image2d_t output) + { + const uint x = get_global_id(0); + const uint y = get_global_id(1); + + const float w = get_image_width(output); + const float h = get_image_height(output); + + float2 coord = { ((float) x / w) * get_image_width(input), + ((float) y / h) * get_image_height(input) }; + + float4 pixel = read_imagef(input, sampler, coord); + write_imagef(output, (int2)(x, h - y - 1), pixel); + }; +); + +class ImageWidget : public QGLWidget +{ + Q_OBJECT + +public: + ImageWidget(QString fileName, QWidget *parent = 0); + ~ImageWidget(); + + void initializeGL(); + void resizeGL(int width, int height); + void paintGL(); + +private: + QImage qt_image_; + compute::context context_; + compute::command_queue queue_; + compute::program program_; + compute::image2d image_; + compute::image_sampler sampler_; + GLuint gl_texture_; + compute::opengl_texture cl_texture_; +}; + +ImageWidget::ImageWidget(QString fileName, QWidget *parent) + : QGLWidget(parent), + qt_image_(fileName) +{ + gl_texture_ = 0; +} + +ImageWidget::~ImageWidget() +{ +} + +void ImageWidget::initializeGL() +{ + // setup opengl + glDisable(GL_LIGHTING); + + // create the OpenGL/OpenCL shared context + context_ = compute::opengl_create_shared_context(); + + // get gpu device + compute::device gpu = context_.get_device(); + std::cout << "device: " << gpu.name() << std::endl; + + // setup command queue + queue_ = compute::command_queue(context_, gpu); + + // allocate image on the device + compute::image_format format = + compute::qt_qimage_format_to_image_format(qt_image_.format()); + + image_ = compute::image2d( + context_, qt_image_.width(), qt_image_.height(), format, CL_MEM_READ_ONLY + ); + + // transfer image to the device + compute::qt_copy_qimage_to_image2d(qt_image_, image_, queue_); + + // setup image sampler (use CL_FILTER_NEAREST to disable linear interpolation) + sampler_ = compute::image_sampler( + context_, false, CL_ADDRESS_NONE, CL_FILTER_LINEAR + ); + + // build resize program + program_ = compute::program::build_with_source(source, context_); +} + +void ImageWidget::resizeGL(int width, int height) +{ +#if QT_VERSION >= 0x050000 + // scale height/width based on device pixel ratio + width /= windowHandle()->devicePixelRatio(); + height /= windowHandle()->devicePixelRatio(); +#endif + + // resize viewport + glViewport(0, 0, width, height); + + // delete old texture + if(gl_texture_){ + glDeleteTextures(1, &gl_texture_); + gl_texture_ = 0; + } + + // generate new texture + glGenTextures(1, &gl_texture_); + glBindTexture(GL_TEXTURE_2D, gl_texture_); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexImage2D( + GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0 + ); + + // create opencl object for the texture + cl_texture_ = compute::opengl_texture( + context_, GL_TEXTURE_2D, 0, gl_texture_, CL_MEM_WRITE_ONLY + ); +} + +void ImageWidget::paintGL() +{ + float w = width(); + float h = height(); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0.0, w, 0.0, h, -1.0, 1.0); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + + // setup the resize kernel + compute::kernel kernel(program_, "resize_image"); + kernel.set_arg(0, image_); + kernel.set_arg(1, sampler_); + kernel.set_arg(2, cl_texture_); + + // acquire the opengl texture so it can be used in opencl + compute::opengl_enqueue_acquire_gl_objects(1, &cl_texture_.get(), queue_); + + // execute the resize kernel + const size_t global_work_offset[] = { 0, 0 }; + const size_t global_work_size[] = { size_t(width()), size_t(height()) }; + + queue_.enqueue_nd_range_kernel( + kernel, 2, global_work_offset, global_work_size, 0 + ); + + // release the opengl texture so it can be used by opengl + compute::opengl_enqueue_release_gl_objects(1, &cl_texture_.get(), queue_); + + // ensure opencl is finished before rendering in opengl + queue_.finish(); + + // draw a single quad with the resized image texture + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, gl_texture_); + + glBegin(GL_QUADS); + glTexCoord2f(0, 0); glVertex2f(0, 0); + glTexCoord2f(0, 1); glVertex2f(0, h); + glTexCoord2f(1, 1); glVertex2f(w, h); + glTexCoord2f(1, 0); glVertex2f(w, 0); + glEnd(); +} + +// the resize image example demonstrates how to interactively resize a +// 2D image and display it using OpenGL. a image sampler is used to perform +// hardware-accelerated linear interpolation for the resized image. +int main(int argc, char *argv[]) +{ + // setup command line arguments + po::options_description options("options"); + options.add_options() + ("help", "show usage instructions") + ("file", po::value<std::string>(), "image file name (e.g. /path/to/image.png)") + ; + po::positional_options_description positional_options; + positional_options.add("file", 1); + + // parse command line + po::variables_map vm; + po::store( + po::command_line_parser(argc, argv) + .options(options) + .positional(positional_options) + .run(), + vm + ); + po::notify(vm); + + // check for file argument + if(vm.count("help") || !vm.count("file")){ + std::cout << options << std::endl; + return -1; + } + + // get file name + std::string file_name = vm["file"].as<std::string>(); + + // setup qt application + QApplication app(argc, argv); + + // setup image widget + ImageWidget widget(QString::fromStdString(file_name)); + widget.show(); + + // run qt application + return app.exec(); +} + +#include "resize_image.moc" diff --git a/src/boost/libs/compute/example/simple_kernel.cpp b/src/boost/libs/compute/example/simple_kernel.cpp new file mode 100644 index 00000000..4aa1872f --- /dev/null +++ b/src/boost/libs/compute/example/simple_kernel.cpp @@ -0,0 +1,84 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/core.hpp> + +namespace compute = boost::compute; + +// this example demonstrates how to use the Boost.Compute classes to +// setup and run a simple vector addition kernel on the GPU +int main() +{ + // get the default device + compute::device device = compute::system::default_device(); + + // create a context for the device + compute::context context(device); + + // setup input arrays + float a[] = { 1, 2, 3, 4 }; + float b[] = { 5, 6, 7, 8 }; + + // make space for the output + float c[] = { 0, 0, 0, 0 }; + + // create memory buffers for the input and output + compute::buffer buffer_a(context, 4 * sizeof(float)); + compute::buffer buffer_b(context, 4 * sizeof(float)); + compute::buffer buffer_c(context, 4 * sizeof(float)); + + // source code for the add kernel + const char source[] = + "__kernel void add(__global const float *a," + " __global const float *b," + " __global float *c)" + "{" + " const uint i = get_global_id(0);" + " c[i] = a[i] + b[i];" + "}"; + + // create the program with the source + compute::program program = + compute::program::create_with_source(source, context); + + // compile the program + program.build(); + + // create the kernel + compute::kernel kernel(program, "add"); + + // set the kernel arguments + kernel.set_arg(0, buffer_a); + kernel.set_arg(1, buffer_b); + kernel.set_arg(2, buffer_c); + + // create a command queue + compute::command_queue queue(context, device); + + // write the data from 'a' and 'b' to the device + queue.enqueue_write_buffer(buffer_a, 0, 4 * sizeof(float), a); + queue.enqueue_write_buffer(buffer_b, 0, 4 * sizeof(float), b); + + // run the add kernel + queue.enqueue_1d_range_kernel(kernel, 0, 4, 0); + + // transfer results back to the host array 'c' + queue.enqueue_read_buffer(buffer_c, 0, 4 * sizeof(float), c); + + // print out results in 'c' + std::cout << "c: [" << c[0] << ", " + << c[1] << ", " + << c[2] << ", " + << c[3] << "]" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/simple_moving_average.cpp b/src/boost/libs/compute/example/simple_moving_average.cpp new file mode 100644 index 00000000..968db393 --- /dev/null +++ b/src/boost/libs/compute/example/simple_moving_average.cpp @@ -0,0 +1,139 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> +#include <cstdlib> + +#include <boost/compute/core.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/inclusive_scan.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/source.hpp> + +namespace compute = boost::compute; + +/// warning precision is not precise due +/// to the float error accumulation when size is large enough +/// for more precision use double +/// or a kahan sum else results can diverge +/// from the CPU implementation +compute::program make_sma_program(const compute::context& context) +{ + const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( + __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize) + { + const int gid = get_global_id(0); + + float cumValues = 0.f; + int endIdx = gid + wSize/2; + int startIdx = gid -1 - wSize/2; + + if(endIdx > size -1) + endIdx = size -1; + + cumValues += scannedValues[endIdx]; + if(startIdx < 0) + startIdx = -1; + else + cumValues -= scannedValues[startIdx]; + + output[gid] =(float)( cumValues / ( float )(endIdx - startIdx)); + } + ); + + // create sma program + return compute::program::build_with_source(source,context); +} + +bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize) +{ + int size = values.size(); + if(size != (int)smoothValues.size()) return false; + + int semiWidth = wSize/2; + + bool ret = true; + for(int idx = 0 ; idx < size ; ++idx) + { + int start = (std::max)(idx - semiWidth,0); + int end = (std::min)(idx + semiWidth,size-1); + float res = 0; + for(int j = start ; j <= end ; ++j) + { + res+= values[j]; + } + + res /= float(end - start +1); + + if(std::abs(res-smoothValues[idx]) > 1e-3) + { + std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl; + ret = false; + } + } + + return ret; +} + +// generate a uniform law over [0,10] +float myRand() +{ + static const double divisor = double(RAND_MAX)+1.; + return double(rand())/divisor * 10.; +} + +int main() +{ + unsigned int size = 1024; + // wSize must be odd + unsigned int wSize = 21; + // get the default device + compute::device device = compute::system::default_device(); + // create a context for the device + compute::context context(device); + // get the program + compute::program program = make_sma_program(context); + + // create vector of random numbers on the host + std::vector<float> host_vector(size); + std::vector<float> host_result(size); + std::generate(host_vector.begin(), host_vector.end(), myRand); + + compute::vector<float> a(size,context); + compute::vector<float> b(size,context); + compute::vector<float> c(size,context); + compute::command_queue queue(context, device); + + compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue); + + // scan values + compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue); + // sma kernel + compute::kernel kernel(program, "SMA"); + kernel.set_arg(0,b.get_buffer()); + kernel.set_arg(1,(int)b.size()); + kernel.set_arg(2,c.get_buffer()); + kernel.set_arg(3,(int)wSize); + + using compute::uint_; + uint_ tpb = 128; + uint_ workSize = size; + queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb); + + compute::copy(c.begin(),c.end(),host_result.begin(),queue); + + bool res = check_results(host_vector,host_result,wSize); + std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's"; + std::cout << status << std::endl; + + return 0; +} + diff --git a/src/boost/libs/compute/example/sort_vector.cpp b/src/boost/libs/compute/example/sort_vector.cpp new file mode 100644 index 00000000..39ec8ea7 --- /dev/null +++ b/src/boost/libs/compute/example/sort_vector.cpp @@ -0,0 +1,68 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <algorithm> +#include <iostream> +#include <vector> + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/sort.hpp> +#include <boost/compute/container/vector.hpp> + +namespace compute = boost::compute; + +int rand_int() +{ + return rand() % 100; +} + +// this example demonstrates how to sort a vector of ints on the GPU +int main() +{ + // create vector of random values on the host + std::vector<int> host_vector(10); + std::generate(host_vector.begin(), host_vector.end(), rand_int); + + // print out input vector + std::cout << "input: [ "; + for(size_t i = 0; i < host_vector.size(); i++){ + std::cout << host_vector[i]; + + if(i != host_vector.size() - 1){ + std::cout << ", "; + } + } + std::cout << " ]" << std::endl; + + // transfer the values to the device + compute::vector<int> device_vector = host_vector; + + // sort the values on the device + compute::sort(device_vector.begin(), device_vector.end()); + + // transfer the values back to the host + compute::copy(device_vector.begin(), + device_vector.end(), + host_vector.begin()); + + // print out the sorted vector + std::cout << "output: [ "; + for(size_t i = 0; i < host_vector.size(); i++){ + std::cout << host_vector[i]; + + if(i != host_vector.size() - 1){ + std::cout << ", "; + } + } + std::cout << " ]" << std::endl; + + return 0; +} diff --git a/src/boost/libs/compute/example/threefry_engine.cpp b/src/boost/libs/compute/example/threefry_engine.cpp new file mode 100644 index 00000000..5cbf6c8e --- /dev/null +++ b/src/boost/libs/compute/example/threefry_engine.cpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://kylelutz.github.com/compute for more information. +//---------------------------------------------------------------------------// + + +#include <boost/compute/random/threefry_engine.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/system.hpp> +#include <iostream> + +int main() +{ + using boost::compute::uint_; + boost::compute::device device = boost::compute::system::default_device(); + boost::compute::context context(device); + boost::compute::command_queue queue(context, device); + boost::compute::threefry_engine<> rng(queue); + boost::compute::vector<uint_> vector_ctr(20, context); + + uint32_t ctr[20]; + for(int i = 0; i < 10; i++) { + ctr[i*2] = i; + ctr[i*2+1] = 0; + } + boost::compute::copy(ctr, ctr+20, vector_ctr.begin(), queue); + rng.generate(vector_ctr.begin(), vector_ctr.end(), queue); + boost::compute::copy(vector_ctr.begin(), vector_ctr.end(), ctr, queue); + + for(int i = 0; i < 10; i++) { + std::cout << std::hex << ctr[i*2] << " " << ctr[i*2+1] << std::endl; + } + return 0; +} + diff --git a/src/boost/libs/compute/example/time_copy.cpp b/src/boost/libs/compute/example/time_copy.cpp new file mode 100644 index 00000000..35168770 --- /dev/null +++ b/src/boost/libs/compute/example/time_copy.cpp @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//[time_copy_example + +#include <vector> +#include <cstdlib> +#include <iostream> + +#include <boost/compute/event.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/container/vector.hpp> + +namespace compute = boost::compute; + +int main() +{ + // get the default device + compute::device gpu = compute::system::default_device(); + + // create context for default device + compute::context context(gpu); + + // create command queue with profiling enabled + compute::command_queue queue( + context, gpu, compute::command_queue::enable_profiling + ); + + // generate random data on the host + std::vector<int> host_vector(16000000); + std::generate(host_vector.begin(), host_vector.end(), rand); + + // create a vector on the device + compute::vector<int> device_vector(host_vector.size(), context); + + // copy data from the host to the device + compute::future<void> future = compute::copy_async( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + // wait for copy to finish + future.wait(); + + // get elapsed time from event profiling information + boost::chrono::milliseconds duration = + future.get_event().duration<boost::chrono::milliseconds>(); + + // print elapsed time in milliseconds + std::cout << "time: " << duration.count() << " ms" << std::endl; + + return 0; +} + +//] diff --git a/src/boost/libs/compute/example/transform_sqrt.cpp b/src/boost/libs/compute/example/transform_sqrt.cpp new file mode 100644 index 00000000..860063fc --- /dev/null +++ b/src/boost/libs/compute/example/transform_sqrt.cpp @@ -0,0 +1,58 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +//[transform_sqrt_example + +#include <vector> +#include <algorithm> + +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/math.hpp> + +namespace compute = boost::compute; + +int main() +{ + // get default device and setup context + compute::device device = compute::system::default_device(); + compute::context context(device); + compute::command_queue queue(context, device); + + // generate random data on the host + std::vector<float> host_vector(10000); + std::generate(host_vector.begin(), host_vector.end(), rand); + + // create a vector on the device + compute::vector<float> device_vector(host_vector.size(), context); + + // transfer data from the host to the device + compute::copy( + host_vector.begin(), host_vector.end(), device_vector.begin(), queue + ); + + // calculate the square-root of each element in-place + compute::transform( + device_vector.begin(), + device_vector.end(), + device_vector.begin(), + compute::sqrt<float>(), + queue + ); + + // copy values back to the host + compute::copy( + device_vector.begin(), device_vector.end(), host_vector.begin(), queue + ); + + return 0; +} + +//] diff --git a/src/boost/libs/compute/example/vector_addition.cpp b/src/boost/libs/compute/example/vector_addition.cpp new file mode 100644 index 00000000..1f3aa92d --- /dev/null +++ b/src/boost/libs/compute/example/vector_addition.cpp @@ -0,0 +1,57 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <iostream> + +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/operator.hpp> + +namespace compute = boost::compute; + +// this example demonstrates how to use Boost.Compute's STL +// implementation to add two vectors on the GPU +int main() +{ + // setup input arrays + float a[] = { 1, 2, 3, 4 }; + float b[] = { 5, 6, 7, 8 }; + + // make space for the output + float c[] = { 0, 0, 0, 0 }; + + // create vectors and transfer data for the input arrays 'a' and 'b' + compute::vector<float> vector_a(a, a + 4); + compute::vector<float> vector_b(b, b + 4); + + // create vector for the output array + compute::vector<float> vector_c(4); + + // add the vectors together + compute::transform( + vector_a.begin(), + vector_a.end(), + vector_b.begin(), + vector_c.begin(), + compute::plus<float>() + ); + + // transfer results back to the host array 'c' + compute::copy(vector_c.begin(), vector_c.end(), c); + + // print out results in 'c' + std::cout << "c: [" << c[0] << ", " + << c[1] << ", " + << c[2] << ", " + << c[3] << "]" << std::endl; + + return 0; +} |