diff options
Diffstat (limited to 'ml/dlib/dlib/dnn/tensor_tools.cpp')
-rw-r--r-- | ml/dlib/dlib/dnn/tensor_tools.cpp | 985 |
1 files changed, 985 insertions, 0 deletions
diff --git a/ml/dlib/dlib/dnn/tensor_tools.cpp b/ml/dlib/dlib/dnn/tensor_tools.cpp new file mode 100644 index 00000000..c0f7fd69 --- /dev/null +++ b/ml/dlib/dlib/dnn/tensor_tools.cpp @@ -0,0 +1,985 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_TeNSOR_TOOLS_CPP_ +#define DLIB_TeNSOR_TOOLS_CPP_ + +#include "tensor_tools.h" +#include "../string.h" +#include <atomic> + +namespace dlib +{ + namespace + { + std::atomic<bool>& dnn_prefer_fastest_algo ( + ) + { + static std::atomic<bool> var(true); + return var; + } + } + + bool dnn_prefer_fastest_algorithms ( + ) + { + return dnn_prefer_fastest_algo(); + } + + void set_dnn_prefer_fastest_algorithms( + ) + { + dnn_prefer_fastest_algo() = true; + } + + void set_dnn_prefer_smallest_algorithms( + ) + { + dnn_prefer_fastest_algo() = false; + } +} + +namespace dlib { namespace tt +{ + +// ---------------------------------------------------------------------------------------- + + void inverse_norms ( + resizable_tensor& invnorms, + const tensor& data, + const double eps + ) + { +#ifdef DLIB_USE_CUDA + cuda::inverse_norms(invnorms, data, eps); +#else + invnorms = reciprocal(sqrt(sum_cols(squared(mat(data))) + eps)); +#endif + } + + void dot_prods ( + resizable_tensor& out, + const tensor& lhs, + const tensor& rhs + ) + { +#ifdef DLIB_USE_CUDA + cuda::dot_prods(out, lhs, rhs); +#else + out = sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); +#endif + } + + void dot_prods ( + bool add_to, + tensor& out, + const tensor& lhs, + const tensor& rhs + ) + { +#ifdef DLIB_USE_CUDA + cuda::dot_prods(add_to, out, lhs, rhs); +#else + if (add_to) + out += sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); + else + out = sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); +#endif + } + + void scale_columns ( + tensor& out, + const tensor& m, + const tensor& v + ) + { + DLIB_CASSERT(have_same_dimensions(out,m)); + DLIB_CASSERT(is_vector(v)); + if (m.size() == 0 && v.size() == 0) + return; + DLIB_CASSERT(m.size() != 0); + DLIB_CASSERT(m.size()/m.num_samples() == v.size()); + +#ifdef DLIB_USE_CUDA + cuda::scale_columns(out, m, v); +#else + DLIB_CASSERT(false, "shouldn't be called right now"); + out = scale_columns(mat(m), mat(v)); +#endif + } + + void scale_rows ( + tensor& out, + const tensor& m, + const tensor& v + ) + { + DLIB_CASSERT(have_same_dimensions(out,m)); + DLIB_CASSERT(is_vector(v)); + if (m.size() == 0 && v.size() == 0) + return; + DLIB_CASSERT(m.size() != 0); + DLIB_CASSERT(m.num_samples() == v.size()); + +#ifdef DLIB_USE_CUDA + cuda::scale_rows(out, m, v); +#else + out = scale_rows(mat(m), mat(v)); +#endif + } + + void scale_rows2 ( + float beta, + tensor& out, + const tensor& m1, + const tensor& m2, + const tensor& v1, + const tensor& v2 + ) + { + DLIB_CASSERT(have_same_dimensions(out,m1)); + DLIB_CASSERT(have_same_dimensions(out,m2)); + DLIB_CASSERT(have_same_dimensions(v1,v2)); + DLIB_CASSERT(is_vector(mat(v1))); + DLIB_CASSERT(v1.size() == m1.num_samples()); + +#ifdef DLIB_USE_CUDA + cuda::scale_rows2(beta, out, m1, m2, v1, v2); +#else + if (beta == 0) + out = scale_rows(mat(m1) - scale_rows(mat(m2),mat(v1)), mat(v2)); + else + out = beta*mat(out) + scale_rows(mat(m1) - scale_rows(mat(m2),mat(v1)), mat(v2)); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void exp ( + tensor& dest, + const tensor& src + ) + { + DLIB_CASSERT(dest.size() == src.size()); + +#ifdef DLIB_USE_CUDA + cuda::exp(dest,src); +#else + dest = exp(mat(src)); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void log ( + tensor& dest, + const tensor& src + ) + { + DLIB_CASSERT(dest.size() == src.size()); + +#ifdef DLIB_USE_CUDA + cuda::log(dest,src); +#else + dest = log(mat(src)); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void log10 ( + tensor& dest, + const tensor& src + ) + { + DLIB_CASSERT(dest.size() == src.size()); + +#ifdef DLIB_USE_CUDA + cuda::log10(dest,src); +#else + dest = log10(mat(src)); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void gemm ( + float beta, + tensor& dest, + float alpha, + const tensor& lhs, + bool trans_lhs, + const tensor& rhs, + bool trans_rhs + ) + { +#ifdef DLIB_USE_CUDA + cuda::gemm(beta, dest, alpha, lhs, trans_lhs, rhs, trans_rhs); +#else + if (beta != 0) + { + if (trans_lhs && trans_rhs) + dest = alpha*trans(mat(lhs))*trans(mat(rhs)) + beta*mat(dest); + else if (!trans_lhs && trans_rhs) + dest = alpha*mat(lhs)*trans(mat(rhs)) + beta*mat(dest); + else if (trans_lhs && !trans_rhs) + dest = alpha*trans(mat(lhs))*mat(rhs) + beta*mat(dest); + else + dest = alpha*mat(lhs)*mat(rhs) + beta*mat(dest); + } + else + { + if (trans_lhs && trans_rhs) + dest = alpha*trans(mat(lhs))*trans(mat(rhs)); + else if (!trans_lhs && trans_rhs) + dest = alpha*mat(lhs)*trans(mat(rhs)); + else if (trans_lhs && !trans_rhs) + dest = alpha*trans(mat(lhs))*mat(rhs); + else + dest = alpha*mat(lhs)*mat(rhs); + } +#endif + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + tensor_rand:: + tensor_rand( + unsigned long long seed + ) +#ifdef DLIB_USE_CUDA + :rnd(seed){} +#else + {rnd.set_seed(cast_to_string(seed)); } +#endif + + void tensor_rand:: + fill_gaussian ( + tensor& data, + float mean, + float stddev + ) + { + DLIB_CASSERT(data.size()%2 == 0); +#ifdef DLIB_USE_CUDA + rnd.fill_gaussian(data, mean, stddev); +#else + for (auto& x : data) + x = rnd.get_random_gaussian()*stddev + mean; +#endif + } + + void tensor_rand:: + fill_uniform ( + tensor& data + ) + { +#ifdef DLIB_USE_CUDA + rnd.fill_uniform(data); +#else + for (auto& x : data) + x = rnd.get_random_float(); +#endif + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + void multiply ( + bool add_to, + tensor& dest, + const tensor& src1, + const tensor& src2 + ) + { + DLIB_CASSERT(dest.k() == src1.k() && src1.k() == src2.k() && + dest.nr() == src1.nr() && src1.nr() == src2.nr() && + dest.nc() == src1.nc() && src1.nc() == src2.nc() ); + const long MD = std::max(std::max(dest.num_samples(),src1.num_samples()),src2.num_samples()); + DLIB_CASSERT((dest.num_samples()==1 || dest.num_samples()==MD) && + (src1.num_samples()==1 || src1.num_samples()==MD) && + (src2.num_samples()==1 || src2.num_samples()==MD) ); +#ifdef DLIB_USE_CUDA + cuda::multiply(add_to, dest, src1, src2); +#else + cpu::multiply(add_to, dest, src1, src2); +#endif + + } + + void scale_channels ( + bool add_to, + tensor& dest, + const tensor& src, + const tensor& scales + ) + { +#ifdef DLIB_USE_CUDA + cuda::scale_channels(add_to, dest, src, scales); +#else + cpu::scale_channels(add_to, dest, src, scales); +#endif + } + + void multiply_conv ( + bool add_to, + tensor& dest, + const tensor& src1, + const tensor& src2 + ) + { +#ifdef DLIB_USE_CUDA + cuda::multiply_conv(add_to, dest, src1, src2); +#else + cpu::multiply_conv(add_to, dest, src1, src2); +#endif + } + + void multiply_zero_padded ( + bool add_to, + tensor& dest, + const tensor& src1, + const tensor& src2 + ) + { +#ifdef DLIB_USE_CUDA + cuda::multiply_zero_padded(add_to, dest, src1, src2); +#else + cpu::multiply_zero_padded(add_to, dest, src1, src2); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void affine_transform( + tensor& dest, + const tensor& src, + const float A, + const float B + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(dest,src,A,B); +#else + cpu::affine_transform(dest,src,A,B); +#endif + } + + void affine_transform( + tensor& dest, + const tensor& src, + const float A + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(dest,src,A); +#else + cpu::affine_transform(dest,src,A,0); +#endif + } + + void affine_transform( + tensor& dest, + const tensor& src1, + const tensor& src2, + const float A, + const float B, + const float C + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(dest,src1,src2,A,B,C); +#else + cpu::affine_transform(dest,src1,src2,A,B,C); +#endif + } + + void affine_transform( + tensor& dest, + const tensor& src1, + const tensor& src2, + const float A, + const float B + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(dest,src1,src2,A,B); +#else + cpu::affine_transform(dest,src1,src2,A,B,0); +#endif + } + + void affine_transform( + tensor& dest, + const tensor& src1, + const tensor& src2, + const tensor& src3, + const float A, + const float B, + const float C, + const float D + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(dest,src1,src2,src3,A,B,C,D); +#else + cpu::affine_transform(dest,src1,src2,src3,A,B,C,D); +#endif + } + + void affine_transform_range( + size_t begin, + size_t end, + tensor& dest, + const tensor& src1, + const tensor& src2, + const tensor& src3, + const float A, + const float B, + const float C + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C); +#else + cpu::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C); +#endif + } + + void affine_transform( + const rectangle& rect, + tensor& dest, + const tensor& src1, + const tensor& src2, + const tensor& src3, + float A, + float B, + float C + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(rect, dest,src1,src2,src3,A,B,C); +#else + cpu::affine_transform(rect, dest,src1,src2,src3,A,B,C); +#endif + } + + void affine_transform( + tensor& dest, + const tensor& src1, + const tensor& src2, + const tensor& src3, + const float A, + const float B, + const float C + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C); +#else + cpu::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void affine_transform( + tensor& dest, + const tensor& src, + const tensor& A, + const tensor& B + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform(dest,src,A,B); +#else + cpu::affine_transform(dest,src,A,B); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void affine_transform_conv( + tensor& dest, + const tensor& src, + const tensor& A, + const tensor& B + ) + { +#ifdef DLIB_USE_CUDA + cuda::affine_transform_conv(dest,src,A,B); +#else + cpu::affine_transform_conv(dest,src,A,B); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void compute_adam_update ( + size_t begin, + size_t end, + tensor& s, + tensor& m, + tensor& v, + const float t, + const float learning_rate, + const float weight_decay, + const float momentum1, + const float momentum2, + const tensor& params, + const tensor& params_grad + ) + { +#ifdef DLIB_USE_CUDA + cuda::compute_adam_update(begin, end, s, m, v, t, learning_rate, weight_decay, momentum1, + momentum2, params, params_grad); +#else + cpu::compute_adam_update(begin, end, s, m, v, t, learning_rate, weight_decay, momentum1, + momentum2, params, params_grad); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void batch_normalize_inference ( + const double eps, + resizable_tensor& dest, + const tensor& src, + const tensor& gamma, + const tensor& beta, + const tensor& running_means, + const tensor& running_variances + ) + { +#ifdef DLIB_USE_CUDA + cuda::batch_normalize_inference(eps,dest,src,gamma,beta,running_means,running_variances); +#else + cpu::batch_normalize_inference(eps,dest,src,gamma,beta,running_means,running_variances); +#endif + } + + void batch_normalize ( + const double eps, + resizable_tensor& dest, + resizable_tensor& means, + resizable_tensor& vars, + const double averaging_factor, + resizable_tensor& running_means, + resizable_tensor& running_variances, + const tensor& src, + const tensor& gamma, + const tensor& beta + ) + { +#ifdef DLIB_USE_CUDA + cuda::batch_normalize(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); +#else + cpu::batch_normalize(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); +#endif + } + + void batch_normalize_gradient ( + const double eps, + const tensor& gradient_input, + const tensor& means, + const tensor& invstds, + const tensor& src, + const tensor& gamma, + tensor& src_grad, + tensor& gamma_grad, + tensor& beta_grad + ) + { + +#ifdef DLIB_USE_CUDA + cuda::batch_normalize_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); +#else + cpu::batch_normalize_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void batch_normalize_conv_inference ( + const double eps, + resizable_tensor& dest, + const tensor& src, + const tensor& gamma, + const tensor& beta, + const tensor& running_means, + const tensor& running_variances + ) + { +#ifdef DLIB_USE_CUDA + cuda::batch_normalize_conv_inference(eps,dest,src,gamma,beta,running_means,running_variances); +#else + cpu::batch_normalize_conv_inference(eps,dest,src,gamma,beta,running_means,running_variances); +#endif + } + + void batch_normalize_conv ( + const double eps, + resizable_tensor& dest, + resizable_tensor& means, + resizable_tensor& vars, + const double averaging_factor, + resizable_tensor& running_means, + resizable_tensor& running_variances, + const tensor& src, + const tensor& gamma, + const tensor& beta + ) + { +#ifdef DLIB_USE_CUDA + cuda::batch_normalize_conv(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); +#else + cpu::batch_normalize_conv(eps,dest,means,vars,averaging_factor,running_means,running_variances,src,gamma,beta); +#endif + } + + void batch_normalize_conv_gradient ( + const double eps, + const tensor& gradient_input, + const tensor& means, + const tensor& invstds, + const tensor& src, + const tensor& gamma, + tensor& src_grad, + tensor& gamma_grad, + tensor& beta_grad + ) + { + +#ifdef DLIB_USE_CUDA + cuda::batch_normalize_conv_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); +#else + cpu::batch_normalize_conv_gradient(eps,gradient_input, means, invstds, src, gamma, src_grad, gamma_grad, beta_grad); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void threshold ( + tensor& data, + float thresh + ) + { +#ifdef DLIB_USE_CUDA + cuda::threshold(data,thresh); +#else + cpu::threshold(data,thresh); +#endif + } + + void dot ( + const tensor& a, + const tensor& b, + tensor& result, + size_t idx + ) + { +#ifdef DLIB_USE_CUDA + cuda::dot(a,b,result,idx); +#else + cpu::dot(a,b,result,idx); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void add( + float beta, + tensor& dest, + float alpha, + const tensor& src + ) + { +#ifdef DLIB_USE_CUDA + cuda::add(beta,dest,alpha,src); +#else + cpu::add(beta,dest,alpha,src); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void add ( + tensor& dest, + const tensor& src1, + const tensor& src2 + ) + { +#ifdef DLIB_USE_CUDA + cuda::add(dest, src1, src2); +#else + cpu::add(dest, src1, src2); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void assign_conv_bias_gradient ( + tensor& grad, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::assign_conv_bias_gradient(grad,gradient_input); +#else + cpu::assign_conv_bias_gradient(grad,gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void assign_bias_gradient ( + tensor& grad, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::assign_bias_gradient(grad,gradient_input); +#else + cpu::assign_bias_gradient(grad,gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + void softmax ( + tensor& dest, + const tensor& src + ) + { +#ifdef DLIB_USE_CUDA + cuda::softmax(dest,src); +#else + cpu::softmax(dest,src); +#endif + } + + void softmax_gradient ( + tensor& grad, + const tensor& dest, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::softmax_gradient(grad, dest, gradient_input); +#else + cpu::softmax_gradient(grad, dest, gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void softmax_all ( + tensor& dest, + const tensor& src + ) + { +#ifdef DLIB_USE_CUDA + cuda::softmax_all(dest,src); +#else + cpu::softmax_all(dest,src); +#endif + } + + void softmax_all_gradient ( + tensor& grad, + const tensor& dest, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::softmax_all_gradient(grad, dest, gradient_input); +#else + cpu::softmax_all_gradient(grad, dest, gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void sigmoid ( + tensor& dest, + const tensor& src + ) + { +#ifdef DLIB_USE_CUDA + cuda::sigmoid(dest,src); +#else + cpu::sigmoid(dest,src); +#endif + } + + void sigmoid_gradient ( + tensor& grad, + const tensor& dest, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::sigmoid_gradient(grad, dest, gradient_input); +#else + cpu::sigmoid_gradient(grad, dest, gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void relu ( + tensor& dest, + const tensor& src + ) + { +#ifdef DLIB_USE_CUDA + cuda::relu(dest,src); +#else + cpu::relu(dest,src); +#endif + } + + void relu_gradient ( + tensor& grad, + const tensor& dest, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::relu_gradient(grad, dest, gradient_input); +#else + cpu::relu_gradient(grad, dest, gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void prelu ( + tensor& dest, + const tensor& src, + const tensor& param + ) + { +#ifdef DLIB_USE_CUDA + cuda::prelu(dest, src, param); +#else + cpu::prelu(dest, src, param); +#endif + } + + void prelu_gradient ( + tensor& grad, + const tensor& src, + const tensor& gradient_input, + const tensor& param, + tensor& params_grad + ) + { +#ifdef DLIB_USE_CUDA + cuda::prelu_gradient(grad, src, gradient_input, param, params_grad); +#else + cpu::prelu_gradient(grad, src, gradient_input, param, params_grad); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void tanh ( + tensor& dest, + const tensor& src + ) + { +#ifdef DLIB_USE_CUDA + cuda::tanh(dest,src); +#else + cpu::tanh(dest,src); +#endif + } + + void tanh_gradient ( + tensor& grad, + const tensor& dest, + const tensor& gradient_input + ) + { +#ifdef DLIB_USE_CUDA + cuda::tanh_gradient(grad, dest, gradient_input); +#else + cpu::tanh_gradient(grad, dest, gradient_input); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void resize_bilinear ( + tensor& dest, + long dest_row_stride, + long dest_channel_stride, + const tensor& src, + long src_row_stride, + long src_channel_stride + ) + { +#ifdef DLIB_USE_CUDA + cuda::resize_bilinear(dest,dest_row_stride,dest_channel_stride, src,src_row_stride,src_channel_stride); +#else + cpu::resize_bilinear(dest,dest_row_stride,dest_channel_stride, src,src_row_stride,src_channel_stride); +#endif + } + + void resize_bilinear_gradient ( + tensor& grad, + long grad_row_stride, + long grad_channel_stride, + const tensor& gradient_input, + long gradient_input_row_stride, + long gradient_input_channel_stride + ) + { +#ifdef DLIB_USE_CUDA + cuda::resize_bilinear_gradient(grad,grad_row_stride,grad_channel_stride, gradient_input,gradient_input_row_stride,gradient_input_channel_stride); +#else + cpu::resize_bilinear_gradient(grad,grad_row_stride,grad_channel_stride, gradient_input,gradient_input_row_stride,gradient_input_channel_stride); +#endif + } + +// ------------------------------------------------------------------------------------ + + void copy_tensor( + bool add_to, + tensor& dest, + size_t dest_k_offset, + const tensor& src, + size_t src_k_offset, + size_t count_k + ) + { +#ifdef DLIB_USE_CUDA + cuda::copy_tensor(add_to, dest, dest_k_offset, src, src_k_offset, count_k); +#else + cpu::copy_tensor(add_to, dest, dest_k_offset, src, src_k_offset, count_k); +#endif + } + +// ---------------------------------------------------------------------------------------- + + void inv:: + operator() ( + const tensor& m, + resizable_tensor& out + ) + { +#ifdef DLIB_USE_CUDA + finv(m,out); +#else + out = dlib::inv(mat(m)); +#endif + } + +// ---------------------------------------------------------------------------------------- + +}} + +#endif // DLIB_TeNSOR_TOOLS_CPP_ + |