diff options
Diffstat (limited to 'ml/dlib/dlib/dnn/tensor_tools.h')
-rw-r--r-- | ml/dlib/dlib/dnn/tensor_tools.h | 1711 |
1 files changed, 0 insertions, 1711 deletions
diff --git a/ml/dlib/dlib/dnn/tensor_tools.h b/ml/dlib/dlib/dnn/tensor_tools.h deleted file mode 100644 index 9ba3154e5..000000000 --- a/ml/dlib/dlib/dnn/tensor_tools.h +++ /dev/null @@ -1,1711 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_TeNSOR_TOOLS_H_ -#define DLIB_TeNSOR_TOOLS_H_ - -#include "tensor.h" -#include "cudnn_dlibapi.h" -#include "cublas_dlibapi.h" -#include "cusolver_dlibapi.h" -#include "curand_dlibapi.h" -#include "cpu_dlib.h" -#include "cuda_dlib.h" -#include "../rand.h" -#include <memory> -#include "../geometry/rectangle.h" -#include "../test_for_odr_violations.h" - -namespace dlib -{ - bool dnn_prefer_fastest_algorithms(); - void set_dnn_prefer_fastest_algorithms(); - void set_dnn_prefer_smallest_algorithms(); -} - -namespace dlib { namespace tt -{ - -// ---------------------------------------------------------------------------------------- - - void inverse_norms ( - resizable_tensor& invnorms, - const tensor& data, - const double eps - ); - /*! - ensures - - #invnorms == reciprocal(sqrt(sum_cols(squared(mat(data))) + eps)) - !*/ - - void dot_prods ( - resizable_tensor& out, - const tensor& lhs, - const tensor& rhs - ); - /*! - requires - - have_same_dimensions(lhs,rhs) == true - ensures - - #out.num_samples() == lhs.num_samples() - - #out.k() == #out.nr() == #out.nc() == 1 - - #out == sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); - !*/ - - void dot_prods ( - bool add_to, - tensor& out, - const tensor& lhs, - const tensor& rhs - ); - /*! - requires - - have_same_dimensions(lhs,rhs) == true - - out.size() == lhs.num_samples() - - out.k() == out.nr() == out.nc() == 1 - ensures - - if (add_to) then - - #out == mat(out) + sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); - - else - - #out == sum_cols(pointwise_multiply(mat(lhs), mat(rhs))); - !*/ - - void scale_columns ( - tensor& out, - const tensor& m, - const tensor& v - ); - /*! - requires - - have_same_dimensions(out,m) == true - - is_vector(v) == true - - v.size() == mat(m).nc() - ensures - - performs: out = scale_columns(mat(m),mat(v)); - !*/ - - void scale_rows ( - tensor& out, - const tensor& m, - const tensor& v - ); - /*! - requires - - have_same_dimensions(out,m) == true - - is_vector(v) == true - - v.size() == m.num_samples() - ensures - - performs: out = scale_rows(mat(m),mat(v)); - !*/ - - void scale_rows2 ( - float beta, - tensor& out, - const tensor& m1, - const tensor& m2, - const tensor& v1, - const tensor& v2 - ); - /*! - requires - - have_same_dimensions(out,m1) == true - - have_same_dimensions(out,m2) == true - - have_same_dimensions(v1,v2) == true - - is_vector(v1) == true - - v1.size() == m1.num_samples() - ensures - - performs: - out = beta*out + scale_rows(mat(m1) - scale_rows(mat(m2),mat(v1)), mat(v2)); - !*/ - -// ---------------------------------------------------------------------------------------- - - void exp ( - tensor& dest, - const tensor& src - ); - /*! - requires - - dest.size() == src.size() - ensures - - performs: dest = exp(mat(src)) - !*/ - -// ---------------------------------------------------------------------------------------- - - void log ( - tensor& dest, - const tensor& src - ); - /*! - requires - - dest.size() == src.size() - ensures - - performs: dest = log(mat(src)) - !*/ - -// ---------------------------------------------------------------------------------------- - - void log10 ( - tensor& dest, - const tensor& src - ); - /*! - requires - - dest.size() == src.size() - ensures - - performs: dest = log10(mat(src)) - !*/ - -// ---------------------------------------------------------------------------------------- - - void gemm ( - float beta, - tensor& dest, - float alpha, - const tensor& lhs, - bool trans_lhs, - const tensor& rhs, - bool trans_rhs - ); - /*! - requires - - dest does not alias the memory of lhs or rhs - - The dimensions of lhs and rhs must be compatible for matrix multiplication. - In particular: - - Let L == trans_lhs ? trans(mat(lhs)) : mat(lhs) - - Let R == trans_rhs ? trans(mat(rhs)) : mat(rhs) - - Let D == mat(dest) - - D.nr() == L.nr() && D.nc() == R.nc() - (i.e. dest must be preallocated and have the correct output dimensions) - - L.nc() == R.nr() - ensures - - performs: dest = alpha*L*R + beta*mat(dest) - !*/ - -// ---------------------------------------------------------------------------------------- - - class inv - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a functor for doing matrix inversion on the GPU. The only - reason it's an object is to avoid the reallocation of some GPU memory - blocks if you want to do a bunch of matrix inversions in a row. - !*/ - public: - - void operator() ( - const tensor& m, - resizable_tensor& out - ); - /*! - requires - - m.size() == m.num_samples()*m.num_samples() - (i.e. mat(m) must be a square matrix) - ensures - - out == inv(mat(m)); - !*/ - - private: -#ifdef DLIB_USE_CUDA - cuda::inv finv; -#endif - }; - -// ---------------------------------------------------------------------------------------- - - class tensor_rand - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a tool for filling a tensor with random numbers. - - Note that the sequence of random numbers output by this object is different - when dlib is compiled with DLIB_USE_CUDA. So you should not write code - that depends on any specific sequence of numbers coming out of a - tensor_rand. - - !*/ - - public: - // not copyable - tensor_rand(const tensor_rand&) = delete; - tensor_rand& operator=(const tensor_rand&) = delete; - - tensor_rand() : tensor_rand(0) {} - tensor_rand(unsigned long long seed); - - void fill_gaussian ( - tensor& data, - float mean = 0, - float stddev = 1 - ); - /*! - requires - - data.size()%2 == 0 - ensures - - Fills data with random numbers drawn from a Gaussian distribution - with the given mean and standard deviation. - !*/ - - void fill_uniform ( - tensor& data - ); - /*! - ensures - - Fills data with uniform random numbers in the range (0.0, 1.0]. - !*/ - -#ifdef DLIB_USE_CUDA - cuda::curand_generator rnd; -#else - dlib::rand rnd; -#endif - }; - -// ---------------------------------------------------------------------------------------- - - void multiply ( - bool add_to, - tensor& dest, - const tensor& src1, - const tensor& src2 - ); - /*! - requires - - dest.k() == src1.k() == src2.k() - - dest.nr() == src1.nr() == src2.nr() - - dest.nc() == src1.nc() == src2.nc() - - dest.num_samples(), src1.num_samples(), and src2.num_samples() must each - either be 1 or whichever ones aren't equal to 1 must have the same values. - ensures - - let MD = max(dest.num_samples(), src1.num_samples(), src2.num_samples) - - This function pointwise multiplies src1 with src2 and stores the result into - #dest. However, how the multiplication happens depends on the dimensions of - the tensors. First, when src1 and src2 are multiplied together, if either - has a num_samples() dimension that is != MD, then it is first replicated to - produce a tensor with num_samples()==MD dimensions and then they are - pointwise multiplied together. - - Second, if dest.num_samples()==1, then after the pointwise multiplication of - src1 with src2, the result has its samples summed to produce an output tensor - with num_samples()==1 which is then assigned to #dest. - - if (add_to) then - - Instead of assigning the result to dest, this function adds the result to dest. - !*/ - - void scale_channels ( - bool add_to, - tensor& dest, - const tensor& src, - const tensor& scales - ); - /*! - requires - - have_same_dimensions(dest, src) == true - - scales.num_samples() == src.num_samples() - - scales.k() == src.k() - - scales.nr() == 1 - - scales.nc() == 1 - ensures - - Scales each channel of src by the corresponding value in scales. To be - precise, we will have: - - #dest(n,k,r,c) == src(n,k,r,c)*scales(n,k,1,1) - - if (add_to) then - - Instead of assigning the result to dest, this function adds the result to dest. - !*/ - - void multiply_conv ( - bool add_to, - tensor& dest, - const tensor& src1, - const tensor& src2 - ); - /*! - requires - - if (have_same_dimensions(dest, src1) == true) then - - src2.num_samples() == 1 - - src2.nr() == 1 - - src2.nc() == 1 - - src2.k() == src1.k() - - else - - have_same_dimensions(src1, src2) == true) - - dest.num_samples() == 1 - - dest.nr() == 1 - - dest.nc() == 1 - - dest.k() == src1.k() - ensures - - Performs #dest == src1*src2 - In particular, if the elements of dest, src1, and src2 were indexed by (n,k,r,c) then - we would have: - - if (have_same_dimensions(dest,src1)) then - #dest(n,k,r,c) == src1(n,k,r,c)*src2(k) - - else - #dest(k) == sum over {n,r,c} of src1(n,k,r,c)*src2(n,k,r,c) - - if (add_to) then - - Instead of assigning the result to dest, this function adds the result to dest. - !*/ - - void multiply_zero_padded ( - bool add_to, - tensor& dest, - const tensor& src1, - const tensor& src2 - ); - /*! - ensures - - if (add_to) then - - performs: dest += src1 * src2 - - else - - performs: dest = src1 * src2 - - In either case, the multiplication happens pointwise according to 4D tensor - arithmetic. If the dimensions don't match then missing elements are presumed - to be equal to 0. - !*/ - -// ---------------------------------------------------------------------------------------- - - void affine_transform( - tensor& dest, - const tensor& src, - const float A, - const float B - ); - /*! - requires - - dest.size()==src.size() - ensures - - #dest == A*src + B - !*/ - - void affine_transform( - tensor& dest, - const tensor& src, - const float A - ); - /*! - requires - - dest.size()==src.size() - ensures - - #dest == A*src - !*/ - - void affine_transform( - tensor& dest, - const tensor& src1, - const tensor& src2, - const float A, - const float B, - const float C - ); - /*! - requires - - dest.size()==src1.size() - - dest.size()==src2.size() - ensures - - #dest == A*src1 + B*src2 + C - !*/ - - void affine_transform( - tensor& dest, - const tensor& src1, - const tensor& src2, - const float A, - const float B - ); - /*! - requires - - dest.size()==src1.size() - - dest.size()==src2.size() - ensures - - #dest == A*src1 + B*src2 - !*/ - - void affine_transform( - tensor& dest, - const tensor& src1, - const tensor& src2, - const tensor& src3, - const float A, - const float B, - const float C, - const float D - ); - /*! - requires - - dest.size()==src1.size() - - dest.size()==src2.size() - - dest.size()==src3.size() - ensures - - #dest == A*src1 + B*src2 + C*src3 + D - !*/ - - void affine_transform( - tensor& dest, - const tensor& src1, - const tensor& src2, - const tensor& src3, - const float A, - const float B, - const float C - ); - /*! - requires - - dest.size()==src1.size() - - dest.size()==src2.size() - - dest.size()==src3.size() - ensures - - #dest == A*src1 + B*src2 + C*src3 - !*/ - - void affine_transform_range( - size_t begin, - size_t end, - tensor& dest, - const tensor& src1, - const tensor& src2, - const tensor& src3, - const float A, - const float B, - const float C - ); - /*! - requires - - dest.size()==src1.size() - - dest.size()==src2.size() - - dest.size()==src3.size() - - begin <= end <= dest.size() - ensures - - This function operates much like - affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only - the half open range [begin,end) rather than processing the entire tensor. - Specifically, it does this: - - for i in the range [begin, end): - - #dest.host()[i] == A*src1.host()[i] + B*src2.host()[i] + C*src3.host()[i] - !*/ - - void affine_transform( - const rectangle& rect, - tensor& dest, - const tensor& src1, - const tensor& src2, - const tensor& src3, - float A, - float B, - float C - ); - /*! - requires - - dest.size()==src1.size() - - dest.size()==src2.size() - - dest.size()==src3.size() - - dest.num_samples()==src1.num_samples() - - dest.num_samples()==src2.num_samples() - - dest.num_samples()==src3.num_samples() - - get_rect(mat(dest)).contains(rect) == true - (i.e. rect must be entirely contained within dest) - ensures - - This function operates much like - affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only - the sub-rectangle indicated by rect. In particular, this function is equivalent - to: - set_subm(dest,rect) = A*subm(mat(src1),rect) + B*subm(mat(src2),rect) + C*subm(mat(src3),rect) - !*/ - -// ---------------------------------------------------------------------------------------- - - void affine_transform( - tensor& dest, - const tensor& src, - const tensor& A, - const tensor& B - ); - /*! - requires - - have_same_dimensions(dest,src) == true - - if (A.num_samples() == 1) then - - B.num_samples() == 1 - - else - - A.num_samples() == src.num_samples() - - B.num_samples() == src.num_samples() - - A.nr() == B.nr() == src.nr() - - A.nc() == B.nc() == src.nc() - - A.k() == B.k() == src.k() - ensures - - if (A.num_samples() == 1) then - - #dest == A*src + B - (done for each sample in src) - - else - - for all valid i: - - #dest.host()[i] == A.host()[i]*src.host()[i] + B.host()[i] - !*/ - -// ---------------------------------------------------------------------------------------- - - void affine_transform_conv( - tensor& dest, - const tensor& src, - const tensor& A, - const tensor& B - ); - /*! - requires - - have_same_dimensions(dest,src) == true - - have_same_dimensions(A, B) == true - - A.num_samples() == 1 - - A.nr() == 1 - - A.nc() == 1 - - A.k() == src.k() - ensures - - Performs #dest == A*src + B - In particular, if the elements of dest and src were indexed by (n,k,r,c) then - we would have: - #dest(n,k,r,c) == A(k)*src(n,k,r,c) + B(k). - !*/ - -// ---------------------------------------------------------------------------------------- - - void compute_adam_update ( - size_t begin, - size_t end, - tensor& s, - tensor& m, - tensor& v, - const float t, - const float learning_rate, - const float weight_decay, - const float momentum1, - const float momentum2, - const tensor& params, - const tensor& params_grad - ); - /*! - requires - - s.size() == m.size() = v.size() == params.size() == params_grad.size() - - t > 0 - - learning_rate > 0 - - weight_decay >= 0 - - 0 <= momentum1 < 1 - - 0 <= momentum2 < 1 - - begin <= end <= params.size() - ensures - - This function implements the ADAM parameter update method described in the paper: - Kingma, Diederik P., and Jimmy Ba Adam. "A method for stochastic - optimization." International Conference on Learning Representation. 2015. - Specifically, it implements the method shown as Algorithm 1. - - #s is the update vector that should be added to the parameters. - - The function only operates in the half open range [begin,end) of the memory - blocks of each tensor. E.g. to make this function run on the entire tensor - set begin to 0 and end to params.size(). - !*/ - -// ---------------------------------------------------------------------------------------- - - void batch_normalize_inference ( - const double eps, - resizable_tensor& dest, - const tensor& src, - const tensor& gamma, - const tensor& beta, - const tensor& running_means, - const tensor& running_variances - ); - /*! - requires - - eps > 0 - - gamma.num_samples() == 1 - - gamma.nr() == src.nr() - - gamma.nc() == src.nc() - - gamma.k() == src.k() - - have_same_dimensions(gamma, beta) - - have_same_dimensions(gamma, running_means) - - have_same_dimensions(gamma, running_variances) - ensures - - Linearly transforms src as a call to batch_normalize() would if src had means - and variances as given by running_means and running_variances. That is, this - function performs: - dest = gamma*(src-running_means)/sqrt(running_variances+eps) + beta - Note that it does it in a pointwise fashion over the samples in src. - !*/ - - void batch_normalize ( - const double eps, - resizable_tensor& dest, - resizable_tensor& means, - resizable_tensor& invstds, - const double averaging_factor, - resizable_tensor& running_means, - resizable_tensor& running_variances, - const tensor& src, - const tensor& gamma, - const tensor& beta - ); - /*! - requires - - eps > 0 - - src.num_samples() > 1 - - gamma.num_samples() == 1 - - beta.num_samples() == 1 - - gamma.nr() == beta.nr() == src.nr() - - gamma.nc() == beta.nc() == src.nc() - - gamma.k() == beta.k() == src.k() - - 0 <= averaging_factor <= 1 - - if (averaging_factor != 1) - - have_same_dimensions(running_means, means) == true - - have_same_dimensions(running_variances, invstds) == true - ensures - - have_same_dimensions(#dest, src) == true - - #means.num_samples() == 1 - - #invstds.num_samples() == 1 - - means.nr() == invstds.nr() == src.nr() - - means.nc() == invstds.nc() == src.nc() - - means.k() == invstds.k() == src.k() - - #src == the batch normalized version of src. - - #means == the mean values of the contents of src. - - #invstds == 1/(the standard deviation values of the contents of src). - - #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means); - - #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src); - !*/ - - void batch_normalize_gradient ( - const double eps, - const tensor& gradient_input, - const tensor& means, - const tensor& invstds, - const tensor& src, - const tensor& gamma, - tensor& src_grad, - tensor& gamma_grad, - tensor& beta_grad - ); - /*! - requires - - eps > 0 - - invstds and means should be the output of a call to - batch_normalize(eps,dest,means,invstds,src,gamma,beta) - - have_same_dimensions(gradient_input, src) == true - - have_same_dimensions(src, src_grad) == true - - src.num_samples() > 1 - - gamma.num_samples() == 1 - - have_same_dimensions(gamma, gamma_grad) == true - - have_same_dimensions(gamma, beta_grad) == true - - gamma.nr() == src.nr() - - gamma.nc() == src.nc() - - gamma.k() == src.k() - - have_same_dimensions(means, gamma) == true - - have_same_dimensions(invstds, gamma) == true - ensures - - Let f(src,gamma,beta) == dot(gradient_input, dest output of - batch_normalize(eps,dest,means,invstds,src,gamma,beta)) - - Adds the gradient of f() with respect to src to #src_grad. - - Assigns the gradient of f() with respect to gamma to #gamma_grad. - - Assigns the gradient of f() with respect to beta to #beta_grad. - !*/ - -// ---------------------------------------------------------------------------------------- - - void batch_normalize_conv_inference ( - const double eps, - resizable_tensor& dest, - const tensor& src, - const tensor& gamma, - const tensor& beta, - const tensor& running_means, - const tensor& running_variances - ); - /*! - requires - - eps > 0 - - gamma.num_samples() == 1 - - gamma.nr() == 1 - - gamma.nc() == 1 - - gamma.k() == src.k() - - have_same_dimensions(gamma, beta) - - have_same_dimensions(gamma, running_means) - - have_same_dimensions(gamma, running_variances) - ensures - - Linearly transforms src as a call to batch_normalize_conv() would if src had - means and variances as given by running_means and running_variances. That - is, this function performs: - dest = gamma*(src-running_means)/sqrt(running_variances+eps) + beta - Note that it does this in a pointwise fashion over the samples, rows, and - columns in src. - !*/ - - void batch_normalize_conv ( - const double eps, - resizable_tensor& dest, - resizable_tensor& means, - resizable_tensor& invstds, - const double averaging_factor, - resizable_tensor& running_means, - resizable_tensor& running_variances, - const tensor& src, - const tensor& gamma, - const tensor& beta - ); - /*! - requires - - eps > 0 - - src.num_samples() > 1 - - gamma.num_samples()==gamma.nr()==gamma.nc() == 1 - - beta.num_samples() ==beta.nr() ==gamma.nc() == 1 - - gamma.k() == beta.k() == src.k() - - 0 <= averaging_factor <= 1 - - if (averaging_factor != 1) - - have_same_dimensions(running_means, means) == true - - have_same_dimensions(running_variances, invstds) == true - ensures - - have_same_dimensions(#dest, src) == true - - #means.num_samples()==means.nr()==means.nc() == 1 - - #invstds.num_samples() ==invstds.nr() ==invstds.nc() == 1 - - means.k() == invstds.k() == src.k() - - #src == the batch normalized version of src. - - #means == the mean values of the contents of src. - - #invstds == 1/(the standard deviation values of the contents of src). - - #running_means = (1-averaging_factor)*mat(#running_means) + averaging_factor*mat(#means); - - #running_variances = (1-averaging_factor)*mat(#running_variances) + averaging_factor*(variance of contents of src); - !*/ - - void batch_normalize_conv_gradient ( - const double eps, - const tensor& gradient_input, - const tensor& means, - const tensor& invstds, - const tensor& src, - const tensor& gamma, - tensor& src_grad, - tensor& gamma_grad, - tensor& beta_grad - ); - /*! - requires - - eps > 0 - - invstds and means should be the output of a call to - batch_normalize_conv(eps,dest,means,invstds,src,gamma,beta) - - have_same_dimensions(gradient_input, src) == true - - have_same_dimensions(src, src_grad) == true - - src.num_samples() > 1 - - gamma.num_samples()==gamma.nr()==gamma.nc() == 1 - - have_same_dimensions(gamma, gamma_grad) == true - - have_same_dimensions(gamma, beta_grad) == true - - gamma.k() == src.k() - - have_same_dimensions(means, gamma) == true - - have_same_dimensions(invstds, gamma) == true - ensures - - Let f(src,gamma,beta) == dot(gradient_input, dest output of - batch_normalize_conv(eps,dest,means,invstds,src,gamma,beta)) - - Adds the gradient of f() with respect to src to #src_grad. - - Assigns the gradient of f() with respect to gamma to #gamma_grad. - - Assigns the gradient of f() with respect to beta to #beta_grad. - !*/ - -// ----------------------------------------------------------------------------------- - - void threshold ( - tensor& data, - float thresh - ); - /*! - ensures - - Sets all elements of data to 1 or 0 depending on if they are above or below - the given threshold. Specifically, for all valid i: - - #data.host()[i] == data.host()[i]>thresh ? 1 : 0 - !*/ - - void dot ( - const tensor& a, - const tensor& b, - tensor& result, - size_t idx - ); - /*! - requires - - a.size() == b.size() - - idx < result.size() - ensures - - #result.host()[idx] == result.host()[idx] + dot(a,b); - I.e. Adds the dot product between a and b into the idx-th element of result. - The reason you might want to use this more complex version of dot() is - because, when using CUDA, it runs by generating asynchronous kernel launches - whereas the version of dot() that returns the result immediately as a scalar - must block the host while we wait for the result to be computed and then - transfered from the GPU do the host for return by dot(). So this version of - dot() might be much faster in some cases. - !*/ - -// ---------------------------------------------------------------------------------------- - - void add( - float beta, - tensor& dest, - float alpha, - const tensor& src - ); - /*! - requires - - One of the following is true: - - have_same_dimensions(src, dest) - - src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1 - - src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc() - - src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc() - - src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1 - - is_same_object(src,dest) == false - ensures - - performs: dest = beta*dest + alpha*src - However, how the addition happens depends on the dimensions of src. In - particular, this function adds the scaled values of one src tensor to dest. - Each dimension of the src tensor must match the corresponding dimension of - the dest tensor or must be equal to 1. In the latter case, the same value - from the src tensor, for those dimensions, will be used to add into the dest - tensor. - !*/ - -// ---------------------------------------------------------------------------------------- - - void add ( - tensor& dest, - const tensor& src1, - const tensor& src2 - ); - /*! - ensures - - performs: dest = src1 + src2 - The addition happens pointwise according to 4D tensor arithmetic. If the - dimensions don't match then missing elements are presumed to be equal to 0. - !*/ - -// ---------------------------------------------------------------------------------------- - - void assign_conv_bias_gradient ( - tensor& grad, - const tensor& gradient_input - ); - /*! - requires - - grad.num_samples() == 1 - - grad.k() >= 1 - - grad.nr() == 1 - - grad.nc() == 1 - - gradient_input.k() == grad.k() - - gradient_input.size() > 0 - - is_same_object(grad,gradient_input) == false - ensures - - let BIAS be a tensor with the same dimensions as grad. - - let OUT be the output of add(1,OUT,1,BIAS) - - let f(gradient_input,BIAS) == dot(gradient_input,OUT) - - Then this function computes the gradient of f() with respect to BIAS and - assigns it to grad. - !*/ - -// ---------------------------------------------------------------------------------------- - - void assign_bias_gradient ( - tensor& grad, - const tensor& gradient_input - ); - /*! - requires - - grad.num_samples() == 1 - - gradient_input.k() == grad.k() - - gradient_input.nr() == grad.nr() - - gradient_input.nc() == grad.nc() - - gradient_input.size() > 0 - - is_same_object(grad,gradient_input) == false - ensures - - let BIAS be a tensor with the same dimensions as grad. - - let OUT be the output of add(1,OUT,1,BIAS) - - let f(gradient_input,BIAS) == dot(gradient_input,OUT) - - Then this function computes the gradient of f() with respect to BIAS and - assigns it to grad. - !*/ - -// ---------------------------------------------------------------------------------------- - - class tensor_conv - { - public: - tensor_conv(const tensor_conv&) = delete; - tensor_conv& operator=(const tensor_conv&) = delete; - - tensor_conv() {} - - void clear( - ) { impl.clear(); } - - void operator() ( - const bool add_to_output, - tensor& output, - const tensor& data, - const tensor& filters - ) { impl(add_to_output,output,data,filters); } - /*! - requires - - setup() has been called. Specifically, setup() has been called like this: - this->setup(data, filters, stride_y, stride_x, padding_y, padding_x); - - is_same_object(output,data) == false - - is_same_object(output,filters) == false - - filters.k() == data.k() - - filters.nr() <= src.nr() + 2*padding_y - - filters.nc() <= src.nc() + 2*padding_x - - #output.num_samples() == data.num_samples() - - #output.k() == filters.num_samples() - - #output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y - - #output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x - ensures - - Convolves filters over data. If add_to_output==true then we add the - results to output, otherwise we assign to output, overwriting the - previous values in output. - - filters contains filters.num_samples() filters. - !*/ - - void operator() ( - const bool add_to_output, - resizable_tensor& output, - const tensor& data, - const tensor& filters - ) { impl(add_to_output,output,data,filters); } - /*! - requires - - setup() has been called. Specifically, setup() has been called like this: - this->setup(data, filters, stride_y, stride_x, padding_y, padding_x); - - is_same_object(output,data) == false - - is_same_object(output,filters) == false - - filters.k() == data.k() - - filters.nr() <= src.nr() + 2*padding_y - - filters.nc() <= src.nc() + 2*padding_x - ensures - - Convolves filters over data. If add_to_output==true then we add the - results to output, otherwise we assign to output, overwriting the - previous values in output. - - filters contains filters.num_samples() filters. - - #output.num_samples() == data.num_samples() - - #output.k() == filters.num_samples() - - #output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y - - #output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x - !*/ - - void get_gradient_for_data ( - const bool add_to_output, - const tensor& gradient_input, - const tensor& filters, - tensor& data_gradient - ) { impl.get_gradient_for_data(add_to_output,gradient_input,filters,data_gradient); } - /*! - requires - - One of the following must be true: - - filters has the same dimensions as the filters object given to the - last call to operator(). Also, data_gradient has the same dimensions - as the data object given to the last call to operator(). - - setup() has been called. Specifically, setup() has been called like this: - this->setup(data_gradient, filters, stride_y, stride_x, padding_y, padding_x); - - gradient_input has the following dimensions: - - gradient_input.num_samples() == data_gradient.num_samples() - - gradient_input.k() == filters.num_samples() - - gradient_input.nr() == 1+(data_gradient.nr() + 2*padding_y - filters.nr())/stride_y - - gradient_input.nc() == 1+(data_gradient.nc() + 2*padding_x - filters.nc())/stride_x - - NOTE, these dimensions are what you would obtain if gradient_input - has the same dimensions as the last output of operator(). - - is_same_object(data_gradient,filters) == false - - is_same_object(data_gradient,gradient_input) == false - ensures - - let OUT be the output of (*this)(OUT,data,filters,sx,sy). - - let f(data,filters) == dot(OUT, gradient_input) - - if (add_to_output) then - - This function finds the gradient of f() with respect to data and adds - this gradient to data_gradient. - - else - - This function finds the gradient of f() with respect to data and - assigns this gradient to data_gradient, overwriting the previous - values in data_gradient. - !*/ - - void get_gradient_for_filters ( - const bool add_to_output, - const tensor& gradient_input, - const tensor& data, - tensor& filters_gradient - ) { impl.get_gradient_for_filters(add_to_output,gradient_input,data,filters_gradient); } - /*! - requires - - One of the following must be true: - - filters_gradient has the same dimensions as the filters object given - to the last call to operator(). Also, data has the same dimensions - as the data object given to the last call to operator(). - - setup() has been called. Specifically, setup() has been called like this: - this->setup(data, filters_gradient, stride_y, stride_x, padding_y, padding_x); - - gradient_input has the following dimensions: - - gradient_input.num_samples() == data.num_samples() - - gradient_input.k() == filters.num_samples() - - gradient_input.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y - - gradient_input.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x - - NOTE, these dimensions are what you would obtain if gradient_input - has the same dimensions as the last output of operator(). - - is_same_object(filters_gradient,data) == false - - is_same_object(filters_gradient,gradient_input) == false - ensures - - let OUT be the output of (*this)(OUT,data,filters,sx,sy). - - let f(data,filters) == dot(OUT, gradient_input) - - if (add_to_output) then - - This function finds the gradient of f() with respect to filters and - adds this gradient to filters_gradient. - - else - - This function finds the gradient of f() with respect to filters and - assigns this gradient to filters_gradient, overwriting the previous - values in filters_gradient. - !*/ - - - void setup( - const tensor& data, - const tensor& filters, - int stride_y, - int stride_x, - int padding_y, - int padding_x - ) {impl.setup(data,filters,stride_y,stride_x,padding_y,padding_x); } - /*! - requires - - filters.k() == data.k() - - stride_y > 0 - - stride_x > 0 - - 0 <= padding_y < filters.nr() - - 0 <= padding_x < filters.nc() - ensures - - When operator() is called, the output tensor will have these dimensions: - - output.nr() == 1+(data.nr() + 2*padding_y - filters.nr())/stride_y - - output.nc() == 1+(data.nc() + 2*padding_x - filters.nc())/stride_x - - output.num_samples() == data.num_samples() - - output.k() == filters.num_samples() - - The point of setup() is to allow this object to gather information about - all the tensor sizes and filter layouts involved in the computation. In - particular, the reason the tensors are input into setup() is just to - observe their sizes. setup() doesn't do anything with the contents of - the tensors, or store any kind of references to the data or filter - tensors. - !*/ - - private: -#ifdef DLIB_USE_CUDA - cuda::tensor_conv impl; -#else - cpu::tensor_conv impl; -#endif - - }; - -// ---------------------------------------------------------------------------------------- - - class pooling - { - /*! - WHAT THIS OBJECT REPRESENTS - The pooling object is a tool for performing spatial pooling over a tensor. - It can be configured to do either max or average pooling. - !*/ - public: - - pooling(const pooling&) = delete; - pooling& operator=(const pooling&) = delete; - - pooling ( - ) = default; - - void clear( - ) { impl.clear(); } - - void setup_max_pooling( - int window_height, - int window_width, - int stride_y, - int stride_x, - int padding_y, - int padding_x - ) { impl.setup_max_pooling(window_height, window_width, stride_y, stride_x, padding_y, padding_x); } - /*! - requires - - window_height > 0 - - window_width > 0 - - stride_y > 0 - - stride_x > 0 - - 0 <= padding_y < window_height - - 0 <= padding_x < window_width - ensures - - When you call operator() it will do max pooling with the given - parameters. - !*/ - - void setup_avg_pooling( - int window_height, - int window_width, - int stride_y, - int stride_x, - int padding_y, - int padding_x - ) { impl.setup_avg_pooling(window_height, window_width, stride_y, stride_x, padding_y, padding_x); } - /*! - requires - - window_height > 0 - - window_width > 0 - - stride_y > 0 - - stride_x > 0 - - 0 <= padding_y < window_height - - 0 <= padding_x < window_width - ensures - - When you call operator() it will do average pooling with the given - parameters. - !*/ - - bool does_max_pooling( - ) const { return impl.does_max_pooling(); } - - void operator() ( - resizable_tensor& dest, - const tensor& src - ) { impl(dest, src); } - /*! - requires - - is_same_object(dest,src) == false - - either setup_max_pooling() or setup_avg_pooling() has been called. - - window_width <= src.nc() + 2*padding_x - - window_height <= src.nr() + 2*padding_y - ensures - - #dest.num_samples() == src.num_samples() - - #dest.k() == src.k() - - #dest.nr() == 1 + (src.nr() + 2*padding_y - window_height)/stride_y - - #dest.nc() == 1 + (src.nc() + 2*padding_x - window_width)/stride_x - - WINDOW == centered_rect(x*stride_x + window_width/2 - padding_x, - y*stride_y + window_height/2 - padding_y, - window_width, - window_height) - - for all valid s, k, r, and c: - - if (does_max_pooling()) then - - image_plane(#dest,s,k)(r,c) == max(subm_clipped(image_plane(src,s,k),WINDOW(c,r))) - - else - - image_plane(#dest,s,k)(r,c) == mean(subm_clipped(image_plane(src,s,k),WINDOW(c,r))) - !*/ - - void get_gradient( - const tensor& gradient_input, - const tensor& dest, - const tensor& src, - tensor& grad - ) { impl.get_gradient(gradient_input, dest, src, grad); } - /*! - requires - - have_same_dimensions(gradient_input,dest) == true - - have_same_dimensions(src,grad) == true - - dest contains the result of calling (*this)(dest,src) - - is_same_object(grad,gradient_input) == false - - is_same_object(grad,dest) == false - - is_same_object(grad,src) == false - ensures - - Recalling that dest is the output of (*this)(dest,src), - let f(src) == dot(gradient_input,dest) - - Then this function computes the gradient of f() with respect to src and - adds it to grad. - !*/ - - private: -#ifdef DLIB_USE_CUDA - cuda::pooling impl; -#else - cpu::pooling impl; -#endif - }; - -// ---------------------------------------------------------------------------------------- - - void softmax ( - tensor& dest, - const tensor& src - ); - /*! - requires - - have_same_dimensions(dest, src) == true - ensures - - Note that the softmax function is a vector valued function: - s(x) == exp(x)/sum(exp(x)) - - Computes the softmax function on src and writes the results to dest. The - softmax is computed per spatial location across the different channels at - each location. That is, softmax() outputs a new tensor, #dest, where each of - the spatial locations in dest (i.e. image idx, row idx, and column idx) - contains the output of s() evaluated over the channel values at each - location. - - This function supports in-place operation, i.e. having - is_same_object(dest, src)==true - !*/ - - void softmax_gradient ( - tensor& grad, - const tensor& dest, - const tensor& gradient_input - ); - /*! - requires - - have_same_dimensions(dest,gradient_input) == true - - have_same_dimensions(dest,grad) == true - ensures - - We interpret dest as the output of softmax(dest,SRC) for some SRC tensor. - Then let f(SRC) == dot(gradient_input,dest). Then this function computes the - gradient of f() with respect to SRC and stores it to grad. Moreover, if - is_same_object(grad,gradient_input)==true then the output is assigned to - grad, replacing its previous contents. Otherwise the output is added to - grad. - - This function supports in-place operation, i.e. having - is_same_object(grad, gradient_input)==true - !*/ - -// ---------------------------------------------------------------------------------------- - - void softmax_all ( - tensor& dest, - const tensor& src - ); - /*! - requires - - have_same_dimensions(dest, src) == true - ensures - - Note that the softmax function is a vector valued function: - s(x) == exp(x)/sum(exp(x)) - - Computes the softmax function on src and writes the results to dest. The - softmax is computed over the entire tensor with one invocation of s(). So - unlike softmax() which computes many s() evaluations, one for each spatial - location, softmax_all() calls s() once for the entire tensor. - - This function supports in-place operation, i.e. having - is_same_object(dest, src)==true - !*/ - - void softmax_all_gradient ( - tensor& grad, - const tensor& dest, - const tensor& gradient_input - ); - /*! - requires - - have_same_dimensions(dest,gradient_input) == true - - have_same_dimensions(dest,grad) == true - - is_same_object(grad, dest)==false - ensures - - We interpret dest as the output of softmax_all(dest,SRC) for some SRC tensor. - Then let f(SRC) == dot(gradient_input,dest) Then this function computes the - gradient of f() with respect to SRC and assigns it to grad. - - This function supports in-place operation, i.e. having - is_same_object(grad, gradient_input)==true - !*/ - -// ---------------------------------------------------------------------------------------- - - void sigmoid ( - tensor& dest, - const tensor& src - ); - /*! - requires - - have_same_dimensions(dest, src) == true - ensures - - for all valid i: - - #dest.host()[i] == 1/(1+std::exp(-src.host()[i])) - - This function supports in-place operation, i.e. having - is_same_object(dest, src)==true - !*/ - - void sigmoid_gradient ( - tensor& grad, - const tensor& dest, - const tensor& gradient_input - ); - /*! - requires - - have_same_dimensions(dest,gradient_input) == true - - have_same_dimensions(dest,grad) == true - ensures - - Recalling that dest is the output of sigmoid(dest,SRC) for some SRC tensor, - let f(SRC) == dot(gradient_input,dest). Then this function computes the - gradient of f() with respect to SRC and stores it to grad. Moreover, if - is_same_object(grad,gradient_input)==true then the output is assigned to - grad, replacing its previous contents. Otherwise the output is added to - grad. - - This function supports in-place operation, i.e. having - is_same_object(grad, gradient_input)==true - !*/ - -// ---------------------------------------------------------------------------------------- - - void relu ( - tensor& dest, - const tensor& src - ); - /*! - requires - - have_same_dimensions(dest, src) == true - ensures - - for all valid i: - - #dest.host()[i] == std::max(0,src.host()[i]) - - This function supports in-place operation, i.e. having - is_same_object(dest, src)==true - !*/ - - void relu_gradient ( - tensor& grad, - const tensor& dest, - const tensor& gradient_input - ); - /*! - requires - - have_same_dimensions(dest,gradient_input) == true - - have_same_dimensions(dest,grad) == true - ensures - - Recalling that dest is the output of relu(dest,SRC) for some SRC tensor, - let f(SRC) == dot(gradient_input,dest). Then this function computes the - gradient of f() with respect to SRC and stores it to grad. Moreover, if - is_same_object(grad,gradient_input)==true then the output is assigned to - grad, replacing its previous contents. Otherwise the output is added to - grad. - - This function supports in-place operation, i.e. having - is_same_object(grad, gradient_input)==true - !*/ - -// ---------------------------------------------------------------------------------------- - - void prelu ( - tensor& dest, - const tensor& src, - const tensor& param - ); - /*! - requires - - have_same_dimensions(dest, src) == true - - param.size() == 1 - ensures - - for all valid i: - - if (src.host()[i] > 0) then - - #dest.host()[i] == src.host()[i] - - else - - #dest.host()[i] == src.host()[i] * param.host()[0] - - This function supports in-place operation, i.e. having - is_same_object(dest, src)==true - !*/ - - void prelu_gradient ( - tensor& grad, - const tensor& src, - const tensor& gradient_input, - const tensor& param, - tensor& params_grad - ); - /*! - requires - - have_same_dimensions(grad,src) == true - - have_same_dimensions(grad,gradient_input) == true - - param.size() == 1 - - params_grad.size() == 1 - - is_same_object(grad, gradient_input) == false - ensures - - Recalling that dest is the output of prelu(dest,src,param) let - f(src,param) == dot(gradient_input,dest) - - Then this function computes the gradient of f() with respect to src and - param. It assigns the gradient with respect to param to #params_grad and - adds the gradient with respect to src to #grad. - !*/ - -// ---------------------------------------------------------------------------------------- - - void tanh ( - tensor& dest, - const tensor& src - ); - /*! - requires - - have_same_dimensions(dest, src) == true - ensures - - for all valid i: - - #dest.host()[i] == std::tanh(src.host()[i]) - - This function supports in-place operation, i.e. having - is_same_object(dest, src)==true - !*/ - - void tanh_gradient ( - tensor& grad, - const tensor& dest, - const tensor& gradient_input - ); - /*! - requires - - have_same_dimensions(dest,gradient_input) == true - - have_same_dimensions(dest,grad) == true - ensures - - Recalling that dest is the output of tanh(dest,SRC) for some SRC tensor, - let f(SRC) == dot(gradient_input,dest). Then this function computes the - gradient of f() with respect to SRC and stores it to grad. Moreover, if - is_same_object(grad,gradient_input)==true then the output is assigned to - grad, replacing its previous contents. Otherwise the output is added to - grad. - - This function supports in-place operation, i.e. having - is_same_object(grad, gradient_input)==true - !*/ - -// ---------------------------------------------------------------------------------------- - - void resize_bilinear ( - tensor& dest, - long dest_row_stride, - long dest_channel_stride, - const tensor& src, - long src_row_stride, - long src_channel_stride - ); - /*! - requires - - is_same_object(dest, src)==false - - dest.num_samples() == src.num_samples() - - dest.k() == src.k() - ensures - - for all valid i,k: image_plane(dest,i,k) is a copy of image_plane(src,i,k) - that has been bilinearly interpolated to fit into the shape of - image_plane(dest,i,k). - - Instead of supposing the row stride and channel stride in the tensors is - given by tensor::nc() and tensor::nr()*tensor::nc() respectively, we use the - provided stride values to transition from one row and channel to the next. - This is useful in combination with alias_tensor objects since it allows you - to operate on subwindows in an image. - !*/ - - void resize_bilinear_gradient ( - tensor& grad, - long grad_row_stride, - long grad_channel_stride, - const tensor& gradient_input, - long gradient_input_row_stride, - long gradient_input_channel_stride - ); - /*! - requires - - is_same_object(grad, gradient_input)==false - - gradient_input.num_samples() == grad.num_samples() - - gradient_input.k() == grad.k() - ensures - - Suppose that DEST is the output of resize_bilinear(DEST,SRC) for some SRC - tensor, let f(SRC) == dot(gradient_input,DEST). Then this function computes - the gradient of f() with respect to SRC and adds it to grad. It should be - noted that we don't need to know the contents of DEST to compute this - gradient. All that matters is that gradient_input have the same dimensions - as DEST. - - Instead of supposing the row stride and channel stride in the tensors is - given by tensor::nc() and tensor::nr()*tensor::nc() respectively, we use the - provided stride values to transition from one row and channel to the next. - This is useful in combination with alias_tensor objects since it allows you - to operate on subwindows in an image. - !*/ - - inline void resize_bilinear ( - tensor& dest, - const tensor& src - ) { resize_bilinear(dest, dest.nc(), dest.nr()*dest.nc(), src, src.nc(), src.nr()*src.nc()); } - /*! - requires - - is_same_object(dest, src)==false - - dest.num_samples() == src.num_samples() - - dest.k() == src.k() - ensures - - for all valid i,k: image_plane(dest,i,k) is a copy of image_plane(src,i,k) - that has been bilinearly interpolated to fit into the shape of - image_plane(dest,i,k). - !*/ - - inline void resize_bilinear_gradient ( - tensor& grad, - const tensor& gradient_input - ) { resize_bilinear_gradient(grad, grad.nc(), grad.nr()*grad.nc(), gradient_input, gradient_input.nc(), gradient_input.nr()*gradient_input.nc()); } - /*! - requires - - is_same_object(grad, gradient_input)==false - - gradient_input.num_samples() == grad.num_samples() - - gradient_input.k() == grad.k() - ensures - - Suppose that DEST is the output of resize_bilinear(DEST,SRC) for some SRC - tensor, let f(SRC) == dot(gradient_input,DEST). Then this function computes - the gradient of f() with respect to SRC and adds it to grad. It should be - noted that we don't need to know the contents of DEST to compute this - gradient. All that matters is that gradient_input have the same dimensions - as DEST. - !*/ - -// ---------------------------------------------------------------------------------------- - - class multi_device_tensor_averager - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for very quickly averaging a bunch of tensors - together. - !*/ - public: - - multi_device_tensor_averager(const multi_device_tensor_averager&) = delete; - multi_device_tensor_averager& operator=(const multi_device_tensor_averager&) = delete; - - multi_device_tensor_averager() = default; - - void set( - std::vector<tensor*> items - ) - /*! - requires - - All the tensors in items are the same size - ensures - - When you call average() we will average the tensors in items. - - It's important that the tensors already be allocated to their devices - before you call set(). This is because set() will setup the types of - between device transfers now and use them when you call average(). - !*/ - { - using namespace ::dlib::cuda; - accessible_groups.clear(); - epa.clear(); - if (items.size() < 1) - return; - - scale = 1.0/items.size(); - - // split item into groups of accessible devices - std::vector<tensor*> group, unused; - while(items.size() > 0) - { - group.push_back(items[0]); - for(size_t i = 1; i < items.size(); ++i) - { - if (can_access_peer(*items[0], *items[i])) - group.push_back(items[i]); - else - unused.push_back(items[i]); - } - accessible_groups.push_back(group); - unused.swap(items); - unused.clear(); - group.clear(); - } - for (auto&& g : accessible_groups) - { - for (size_t i = 1; i < g.size(); ++i) - { - epa.emplace_back(new enable_peer_access(*g[0], *g[i])); - } - } - } - - size_t num_device_groups( - ) const { return accessible_groups.size(); } - /*! - ensures - - The devices given to set() are grouped together when they can directly - access each other using GPUDirect. This function returns the number of - such groups. For example, if all devices can directly access each other - then the number of groups is 1. - !*/ - - void average() - /*! - requires - - All the devices have stopped writing to the tensors given to set(). So - you should probably call cudaDeviceSynchronize() on each of the relevant - devices before calling average(). - ensures - - Computes the average of all the tensors given to set() and then sets them - all equal to the average. - !*/ - { - using namespace ::dlib::cuda; - - - // First we average things within each group - for (auto&& g : accessible_groups) - { - raii_set_device set_dev(*g[0]); - if (g.size() == 1) - tt::affine_transform(*g[0], *g[0], scale); - else - tt::affine_transform(*g[0], *g[0], *g[1], scale, scale); - - for (size_t i = 2; i < g.size(); ++i) - tt::affine_transform(*g[0], *g[0], *g[i], 1, scale); - } - - if (accessible_groups.size() > 1) - { - tensor& total_avg = *accessible_groups[0][0]; - raii_set_device set_dev(total_avg); - accum_buffer.copy_size(total_avg); - // now we need to average things across groups - for (size_t i = 1; i < accessible_groups.size(); ++i) - { - memcpy(accum_buffer, *accessible_groups[i][0]); - tt::add(total_avg, total_avg, accum_buffer); - } - - // Now total_avg has the final average in it. So we need to send - // copies of it back to each of the groups. - for (size_t i = 1; i < accessible_groups.size(); ++i) - { - memcpy(*accessible_groups[i][0], total_avg); - } - } - - - // Now propagate averages back out to each element using point to point - // communication inside a group. - for (auto&& g : accessible_groups) - { - raii_set_device set_dev(*g[0]); - for (size_t i = 1; i < g.size(); ++i) - memcpy(*g[i], *g[0]); - } - } - - private: - std::vector<std::unique_ptr<::dlib::cuda::enable_peer_access>> epa; - std::vector<std::vector<tensor*>> accessible_groups; - float scale; - - resizable_tensor accum_buffer; - }; - -// ---------------------------------------------------------------------------------------- - - void copy_tensor( - bool add_to, - tensor& dest, - size_t dest_k_offset, - const tensor& src, - size_t src_k_offset, - size_t count_k - ); - /*! - requires - - dest.nc() == src.nc() - - dest.nr() == src.nr() - - dest.num_samples() == src.num_samples() - - dest.k() - dest_k_offset >= count_k - - src.k() - src_k_offset >= count_k - - is_same_object(dest,src) == false - - The memory areas of src and dest do not overlap. - ensures - - if (add_to) then - - performs: dest[i, k + dest_k_offset, r, c] += src[i, k + src_k_offset, r, c], where k in [0..count_k] - i.e., adds content of each sample from src in to corresponding place of sample at dest. - - else - - performs: dest[i, k + dest_k_offset, r, c] = src[i, k + src_k_offset, r, c], where k in [0..count_k] - i.e., copies content of each sample from src in to corresponding place of sample at dest. - !*/ - -// ---------------------------------------------------------------------------------------- - -}} - -#ifdef NO_MAKEFILE -#include "tensor_tools.cpp" -#endif - -#endif // DLIB_TeNSOR_TOOLS_H_ - - |