summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/dnn/cudnn_dlibapi.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/dnn/cudnn_dlibapi.cpp')
-rw-r--r--ml/dlib/dlib/dnn/cudnn_dlibapi.cpp1604
1 files changed, 0 insertions, 1604 deletions
diff --git a/ml/dlib/dlib/dnn/cudnn_dlibapi.cpp b/ml/dlib/dlib/dnn/cudnn_dlibapi.cpp
deleted file mode 100644
index 6926561f1..000000000
--- a/ml/dlib/dlib/dnn/cudnn_dlibapi.cpp
+++ /dev/null
@@ -1,1604 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_DNN_CuDNN_CPP_
-#define DLIB_DNN_CuDNN_CPP_
-
-#ifdef DLIB_USE_CUDA
-
-#include "cudnn_dlibapi.h"
-#include "tensor.h"
-#include <cudnn.h>
-#include <iostream>
-#include <string>
-#include <vector>
-#include "cuda_utils.h"
-#include "cpu_dlib.h"
-#include "cuda_dlib.h"
-#include "tensor_tools.h"
-
-static const char* cudnn_get_error_string(cudnnStatus_t s)
-{
- switch(s)
- {
- case CUDNN_STATUS_NOT_INITIALIZED:
- return "CUDA Runtime API initialization failed.";
- case CUDNN_STATUS_ALLOC_FAILED:
- return "CUDA Resources could not be allocated.";
- case CUDNN_STATUS_BAD_PARAM:
- return "CUDNN_STATUS_BAD_PARAM";
- case CUDNN_STATUS_EXECUTION_FAILED:
- return "CUDNN_STATUS_EXECUTION_FAILED";
- case CUDNN_STATUS_NOT_SUPPORTED:
- return "CUDNN_STATUS_NOT_SUPPORTED";
- case CUDNN_STATUS_ARCH_MISMATCH:
- return "CUDNN_STATUS_ARCH_MISMATCH: Your GPU is too old and not supported by cuDNN";
- default:
- return "A call to cuDNN failed";
- }
-}
-
-// Check the return value of a call to the cuDNN runtime for an error condition.
-#define CHECK_CUDNN(call) \
-do{ \
- const cudnnStatus_t error = call; \
- if (error != CUDNN_STATUS_SUCCESS) \
- { \
- std::ostringstream sout; \
- sout << "Error while calling " << #call << " in file " << __FILE__ << ":" << __LINE__ << ". ";\
- sout << "code: " << error << ", reason: " << cudnn_get_error_string(error);\
- throw dlib::cudnn_error(sout.str()); \
- } \
-}while(false)
-
-
-namespace dlib
-{
-
- namespace cuda
- {
-
- // ------------------------------------------------------------------------------------
-
- static cudnnTensorDescriptor_t descriptor(const tensor& t)
- {
- return (const cudnnTensorDescriptor_t)t.get_cudnn_tensor_descriptor().get_handle();
- }
- static cudnnTensorDescriptor_t descriptor(const tensor_descriptor& t)
- {
- return (const cudnnTensorDescriptor_t)t.get_handle();
- }
-
- // ------------------------------------------------------------------------------------
-
- class cudnn_context
- {
- public:
- // not copyable
- cudnn_context(const cudnn_context&) = delete;
- cudnn_context& operator=(const cudnn_context&) = delete;
-
- cudnn_context()
- {
- handles.resize(16);
- }
- ~cudnn_context()
- {
- for (auto h : handles)
- {
- if (h)
- cudnnDestroy(h);
- }
- }
-
- cudnnHandle_t get_handle (
- )
- {
- int new_device_id;
- CHECK_CUDA(cudaGetDevice(&new_device_id));
- // make room for more devices if needed
- if (new_device_id >= (long)handles.size())
- handles.resize(new_device_id+16);
-
- // If we don't have a handle already for this device then make one
- if (!handles[new_device_id])
- CHECK_CUDNN(cudnnCreate(&handles[new_device_id]));
-
- // Finally, return the handle for the current device
- return handles[new_device_id];
- }
-
- private:
-
- std::vector<cudnnHandle_t> handles;
- };
-
- static cudnnHandle_t context()
- {
- thread_local cudnn_context c;
- return c.get_handle();
- }
- // ------------------------------------------------------------------------------------
-
- class cudnn_device_buffer
- {
- public:
- // not copyable
- cudnn_device_buffer(const cudnn_device_buffer&) = delete;
- cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
-
- cudnn_device_buffer()
- {
- buffers.resize(16);
- }
- ~cudnn_device_buffer()
- {
- }
-
- std::shared_ptr<resizable_cuda_buffer> get_buffer (
- )
- {
- int new_device_id;
- CHECK_CUDA(cudaGetDevice(&new_device_id));
- // make room for more devices if needed
- if (new_device_id >= (long)buffers.size())
- buffers.resize(new_device_id+16);
-
- // If we don't have a buffer already for this device then make one
- std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
- if (!buff)
- {
- buff = std::make_shared<resizable_cuda_buffer>();
- buffers[new_device_id] = buff;
- }
-
- // Finally, return the buffer for the current device
- return buff;
- }
-
- private:
-
- std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
- };
-
-
- static std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
- {
- thread_local cudnn_device_buffer buffer;
- return buffer.get_buffer();
- }
- // ------------------------------------------------------------------------------------
-
- class cudnn_activation_descriptor
- {
- public:
- // not copyable
- cudnn_activation_descriptor(const cudnn_activation_descriptor&) = delete;
- cudnn_activation_descriptor& operator=(const cudnn_activation_descriptor&) = delete;
-
- cudnn_activation_descriptor(
- cudnnActivationMode_t mode,
- cudnnNanPropagation_t reluNanOpt,
- double reluCeiling
- )
- {
- CHECK_CUDNN(cudnnCreateActivationDescriptor(&handle));
- CHECK_CUDNN(cudnnSetActivationDescriptor(handle, mode, reluNanOpt, reluCeiling));
- }
-
- ~cudnn_activation_descriptor()
- {
- cudnnDestroyActivationDescriptor(handle);
- }
-
- cudnnActivationDescriptor_t get_handle (
- )
- {
- return handle;
- }
- private:
- cudnnActivationDescriptor_t handle;
- };
-
- static cudnnActivationDescriptor_t relu_activation_descriptor()
- {
- thread_local cudnn_activation_descriptor des(CUDNN_ACTIVATION_RELU, CUDNN_PROPAGATE_NAN,0);
- return des.get_handle();
- }
-
- static cudnnActivationDescriptor_t sigmoid_activation_descriptor()
- {
- thread_local cudnn_activation_descriptor des(CUDNN_ACTIVATION_SIGMOID, CUDNN_PROPAGATE_NAN,0);
- return des.get_handle();
- }
-
- static cudnnActivationDescriptor_t tanh_activation_descriptor()
- {
- thread_local cudnn_activation_descriptor des(CUDNN_ACTIVATION_TANH, CUDNN_PROPAGATE_NAN,0);
- return des.get_handle();
- }
-
- // ------------------------------------------------------------------------------------
-
- tensor_descriptor::
- tensor_descriptor(
- ) : handle(nullptr)
- {
- }
-
- tensor_descriptor::
- ~tensor_descriptor()
- {
- set_size(0,0,0,0);
- }
-
- void tensor_descriptor::
- set_size(
- int n,
- int k,
- int nr,
- int nc
- )
- {
- if (handle)
- {
- cudnnDestroyTensorDescriptor((cudnnTensorDescriptor_t)handle);
- handle = nullptr;
- }
-
- if (n != 0 && nr != 0 && nc != 0 && k != 0)
- {
- cudnnTensorDescriptor_t h;
- CHECK_CUDNN(cudnnCreateTensorDescriptor(&h));
- handle = h;
-
- CHECK_CUDNN(cudnnSetTensor4dDescriptor((cudnnTensorDescriptor_t)handle,
- CUDNN_TENSOR_NCHW,
- CUDNN_DATA_FLOAT,
- n,
- k,
- nr,
- nc));
- }
- }
-
- void tensor_descriptor::
- get_size (
- int& n,
- int& k,
- int& nr,
- int& nc
- ) const
- {
- if (handle)
- {
- int nStride, cStride, hStride, wStride;
- cudnnDataType_t datatype;
- CHECK_CUDNN(cudnnGetTensor4dDescriptor((cudnnTensorDescriptor_t)handle,
- &datatype,
- &n,
- &k,
- &nr,
- &nc,
- &nStride,
- &cStride,
- &hStride,
- &wStride));
- }
- else
- {
- n = 0;
- k = 0;
- nr = 0;
- nc = 0;
- }
- }
-
- // ------------------------------------------------------------------------------------
-
- void add(
- float beta,
- tensor& dest,
- float alpha,
- const tensor& src
- )
- {
- DLIB_CASSERT(
- (have_same_dimensions(src, dest) ||
- (src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1) ||
- (src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
- (src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
- (src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)) &&
- is_same_object(src,dest) == false ,
- "\n\t dest.num_samples(): " << dest.num_samples()
- <<"\n\t dest.k(): " << dest.k()
- <<"\n\t dest.nr(): " << dest.nr()
- <<"\n\t dest.nc(): " << dest.nc()
- <<"\n\t src.num_samples(): " << src.num_samples()
- <<"\n\t src.k(): " << src.k()
- <<"\n\t src.nr(): " << src.nr()
- <<"\n\t src.nc(): " << src.nc()
- );
-
- if (dest.size() == src.size() && beta == 1)
- {
- // Call the dlib function in this case since it's faster than the one that
- // comes with cuDNN (at least as of cuDNN v4).
- add_scaled(dest, alpha, src);
- return;
- }
- else if (src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)
- {
- add_cv_to_all_columns(beta, dest, alpha, src);
- return;
- }
-
- CHECK_CUDNN(cudnnAddTensor(context(),
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
- void assign_conv_bias_gradient (
- tensor& grad,
- const tensor& gradient_input
- )
- {
- DLIB_CASSERT(
- grad.num_samples() == 1 &&
- grad.k() >= 1 &&
- grad.nr() == 1 &&
- grad.nc() == 1 &&
- gradient_input.k() == grad.k() &&
- gradient_input.size() > 0 &&
- is_same_object(grad,gradient_input) == false
- );
-
- const float alpha = 1;
- const float beta = 0;
- CHECK_CUDNN(cudnnConvolutionBackwardBias(context(),
- &alpha,
- descriptor(gradient_input),
- gradient_input.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
-
- void batch_normalize_inference (
- const double eps,
- resizable_tensor& dest,
- const tensor& src,
- const tensor& gamma,
- const tensor& beta,
- const tensor& running_means,
- const tensor& running_variances
- )
- {
- DLIB_CASSERT(
- gamma.num_samples() == 1 &&
- gamma.nr() == src.nr() &&
- gamma.nc() == src.nc() &&
- gamma.k() == src.k() &&
- have_same_dimensions(gamma, beta) &&
- have_same_dimensions(gamma, running_means) &&
- have_same_dimensions(gamma, running_variances) &&
- eps > 0,
- "\ngamma.num_samples(): " << gamma.num_samples() <<
- "\ngamma.k(): " << gamma.k() <<
- "\ngamma.nr(): " << gamma.nr() <<
- "\ngamma.nc(): " << gamma.nc() <<
- "\nbeta.num_samples(): " << beta.num_samples() <<
- "\nbeta.k(): " << beta.k() <<
- "\nbeta.nr(): " << beta.nr() <<
- "\nbeta.nc(): " << beta.nc() <<
- "\nrunning_means.num_samples(): " << running_means.num_samples() <<
- "\nrunning_means.k(): " << running_means.k() <<
- "\nrunning_means.nr(): " << running_means.nr() <<
- "\nrunning_means.nc(): " << running_means.nc() <<
- "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
- "\nrunning_variances.k(): " << running_variances.k() <<
- "\nrunning_variances.nr(): " << running_variances.nr() <<
- "\nrunning_variances.nc(): " << running_variances.nc() <<
- "\nsrc.k(): " << src.k() <<
- "\nsrc.nr(): " << src.nr() <<
- "\nsrc.nc(): " << src.nc() <<
- "\neps: " << eps
- );
- const float in_scale = 1;
- const float out_scale = 0;
-
- dest.copy_size(src);
-
- CHECK_CUDNN(cudnnBatchNormalizationForwardInference(
- context(),
- CUDNN_BATCHNORM_PER_ACTIVATION,
- &in_scale,
- &out_scale,
- descriptor(src),
- src.device(),
- descriptor(dest),
- dest.device(),
- descriptor(gamma),
- gamma.device(),
- beta.device(),
- running_means.device(),
- running_variances.device(),
- eps));
- }
-
- void batch_normalize (
- const double eps,
- resizable_tensor& dest,
- resizable_tensor& means,
- resizable_tensor& invstds,
- const double averaging_factor,
- resizable_tensor& running_means,
- resizable_tensor& running_variances,
- const tensor& src,
- const tensor& gamma,
- const tensor& beta
- )
- {
- DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
- DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means));
- DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds));
- DLIB_CASSERT(
- src.num_samples() > 1 &&
- gamma.num_samples() == 1 &&
- beta.num_samples() == 1 &&
- gamma.nr() == beta.nr() && beta.nr() == src.nr() &&
- gamma.nc() == beta.nc() && beta.nc() == src.nc() &&
- gamma.k() == beta.k() && beta.k() == src.k() &&
- eps > 0,
- "\ngamma.num_samples(): " << gamma.num_samples() <<
- "\ngamma.k(): " << gamma.k() <<
- "\ngamma.nr(): " << gamma.nr() <<
- "\ngamma.nc(): " << gamma.nc() <<
- "\nbeta.num_samples(): " << beta.num_samples() <<
- "\nbeta.k(): " << beta.k() <<
- "\nbeta.nr(): " << beta.nr() <<
- "\nbeta.nc(): " << beta.nc() <<
- "\nsrc.k(): " << src.k() <<
- "\nsrc.nr(): " << src.nr() <<
- "\nsrc.nc(): " << src.nc() <<
- "\neps: " << eps
- );
-
- const float in_scale = 1;
- const float out_scale = 0;
-
- dest.copy_size(src);
- means.set_size(1, src.k(), src.nr(), src.nc());
- invstds.copy_size(means);
- running_means.copy_size(means);
- running_variances.copy_size(means);
- // cuDNN requires that running_means and running_variances be initialized to
- // some valid float values even if the averaging factor would have ignored
- // them.
- if (averaging_factor == 1)
- {
- running_means = 0;
- running_variances = 1;
- }
-
- CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
- context(),
- CUDNN_BATCHNORM_PER_ACTIVATION,
- &in_scale,
- &out_scale,
- descriptor(src),
- src.device(),
- descriptor(dest),
- dest.device(),
- descriptor(gamma),
- gamma.device(),
- beta.device(),
- averaging_factor,
- running_means.device(),
- running_variances.device(),
- eps,
- means.device(),
- invstds.device()));
- }
-
- void batch_normalize_gradient(
- const double eps,
- const tensor& gradient_input,
- const tensor& means,
- const tensor& invstds,
- const tensor& src,
- const tensor& gamma,
- tensor& src_grad,
- tensor& gamma_grad,
- tensor& beta_grad
- )
- {
- const long num = src.k()*src.nr()*src.nc();
- DLIB_CASSERT(src.num_samples() > 1);
- DLIB_CASSERT(num == (long)means.size());
- DLIB_CASSERT(num == (long)invstds.size());
- DLIB_CASSERT(num == (long)gamma.size());
- DLIB_CASSERT(num == (long)gamma_grad.size());
- DLIB_CASSERT(num == (long)beta_grad.size());
- DLIB_CASSERT(have_same_dimensions(gradient_input, src));
- DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad));
- DLIB_CASSERT(eps > 0);
-
- const float in_scale = 1;
- const float out_scale = 1;
- const float in_scale_params = 1;
- const float out_scale_params = 0;
-
- CHECK_CUDNN(cudnnBatchNormalizationBackward(
- context(),
- CUDNN_BATCHNORM_PER_ACTIVATION,
- &in_scale,
- &out_scale,
- &in_scale_params,
- &out_scale_params,
- descriptor(src),
- src.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- descriptor(src_grad),
- src_grad.device(),
- descriptor(gamma),
- gamma.device(),
- gamma_grad.device(),
- beta_grad.device(),
- eps,
- means.device(),
- invstds.device()));
- }
-
- // ------------------------------------------------------------------------------------
-
- void batch_normalize_conv_inference (
- const double eps,
- resizable_tensor& dest,
- const tensor& src,
- const tensor& gamma,
- const tensor& beta,
- const tensor& running_means,
- const tensor& running_variances
- )
- {
- DLIB_CASSERT(
- gamma.num_samples() == 1 &&
- gamma.nr() == 1 &&
- gamma.nc() == 1 &&
- gamma.k() == src.k() &&
- have_same_dimensions(gamma, beta) &&
- have_same_dimensions(gamma, running_means) &&
- have_same_dimensions(gamma, running_variances) &&
- eps > 0,
- "\ngamma.num_samples(): " << gamma.num_samples() <<
- "\ngamma.k(): " << gamma.k() <<
- "\ngamma.nr(): " << gamma.nr() <<
- "\ngamma.nc(): " << gamma.nc() <<
- "\nbeta.num_samples(): " << beta.num_samples() <<
- "\nbeta.k(): " << beta.k() <<
- "\nbeta.nr(): " << beta.nr() <<
- "\nbeta.nc(): " << beta.nc() <<
- "\nrunning_means.num_samples(): " << running_means.num_samples() <<
- "\nrunning_means.k(): " << running_means.k() <<
- "\nrunning_means.nr(): " << running_means.nr() <<
- "\nrunning_means.nc(): " << running_means.nc() <<
- "\nrunning_variances.num_samples(): " << running_variances.num_samples() <<
- "\nrunning_variances.k(): " << running_variances.k() <<
- "\nrunning_variances.nr(): " << running_variances.nr() <<
- "\nrunning_variances.nc(): " << running_variances.nc() <<
- "\nsrc.k(): " << src.k() <<
- "\nsrc.nr(): " << src.nr() <<
- "\nsrc.nc(): " << src.nc() <<
- "\neps: " << eps
- );
- const float in_scale = 1;
- const float out_scale = 0;
-
- dest.copy_size(src);
-
- CHECK_CUDNN(cudnnBatchNormalizationForwardInference(
- context(),
- CUDNN_BATCHNORM_SPATIAL,
- &in_scale,
- &out_scale,
- descriptor(src),
- src.device(),
- descriptor(dest),
- dest.device(),
- descriptor(gamma),
- gamma.device(),
- beta.device(),
- running_means.device(),
- running_variances.device(),
- eps));
- }
-
- void batch_normalize_conv (
- const double eps,
- resizable_tensor& dest,
- resizable_tensor& means,
- resizable_tensor& invstds,
- const double averaging_factor,
- resizable_tensor& running_means,
- resizable_tensor& running_variances,
- const tensor& src,
- const tensor& gamma,
- const tensor& beta
- )
- {
- DLIB_CASSERT(0 <= averaging_factor && averaging_factor <= 1, "averaging_factor: " << averaging_factor);
- DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_means,means));
- DLIB_CASSERT(averaging_factor==1 || have_same_dimensions(running_variances,invstds));
- DLIB_CASSERT(
- src.num_samples() > 1 &&
- gamma.num_samples() == 1 &&
- beta.num_samples() == 1 &&
- gamma.nr() == 1 &&
- beta.nr() == 1 &&
- gamma.nc() == 1 &&
- beta.nc() == 1 &&
- gamma.k() == beta.k() && beta.k() == src.k() &&
- eps > 0,
- "\ngamma.num_samples(): " << gamma.num_samples() <<
- "\ngamma.k(): " << gamma.k() <<
- "\ngamma.nr(): " << gamma.nr() <<
- "\ngamma.nc(): " << gamma.nc() <<
- "\nbeta.num_samples(): " << beta.num_samples() <<
- "\nbeta.k(): " << beta.k() <<
- "\nbeta.nr(): " << beta.nr() <<
- "\nbeta.nc(): " << beta.nc() <<
- "\nsrc.k(): " << src.k() <<
- "\nsrc.nr(): " << src.nr() <<
- "\nsrc.nc(): " << src.nc() <<
- "\neps: " << eps
- );
- const float in_scale = 1;
- const float out_scale = 0;
-
- dest.copy_size(src);
- means.set_size(1, src.k());
- invstds.copy_size(means);
- running_means.copy_size(means);
- running_variances.copy_size(means);
- // cuDNN requires that running_means and running_variances be initialized to
- // some valid float values even if the averaging factor would have ignored
- // them.
- if (averaging_factor == 1)
- {
- running_means = 0;
- running_variances = 1;
- }
-
- CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(
- context(),
- CUDNN_BATCHNORM_SPATIAL,
- &in_scale,
- &out_scale,
- descriptor(src),
- src.device(),
- descriptor(dest),
- dest.device(),
- descriptor(gamma),
- gamma.device(),
- beta.device(),
- averaging_factor,
- running_means.device(),
- running_variances.device(),
- eps,
- means.device(),
- invstds.device()));
- }
-
- void batch_normalize_conv_gradient(
- const double eps,
- const tensor& gradient_input,
- const tensor& means,
- const tensor& invstds,
- const tensor& src,
- const tensor& gamma,
- tensor& src_grad,
- tensor& gamma_grad,
- tensor& beta_grad
- )
- {
- DLIB_CASSERT(src.k() == (long)means.size());
- DLIB_CASSERT(src.k() == (long)invstds.size());
- DLIB_CASSERT(src.k() == (long)gamma.size());
- DLIB_CASSERT(src.k() == (long)gamma_grad.size());
- DLIB_CASSERT(src.k() == (long)beta_grad.size());
- DLIB_CASSERT(have_same_dimensions(gradient_input, src));
- DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad));
- DLIB_CASSERT(eps > 0);
-
- const float in_scale = 1;
- const float out_scale = 1;
- const float in_scale_params = 1;
- const float out_scale_params = 0;
-
- CHECK_CUDNN(cudnnBatchNormalizationBackward(
- context(),
- CUDNN_BATCHNORM_SPATIAL,
- &in_scale,
- &out_scale,
- &in_scale_params,
- &out_scale_params,
- descriptor(src),
- src.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- descriptor(src_grad),
- src_grad.device(),
- descriptor(gamma),
- gamma.device(),
- gamma_grad.device(),
- beta_grad.device(),
- eps,
- means.device(),
- invstds.device()));
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- tensor_conv::
- tensor_conv(
- ) :
- filter_handle(nullptr),
- conv_handle(nullptr),
- forward_algo(0),
- backward_data_algo(0),
- backward_filters_algo(0)
- {
- clear();
- }
-
- void tensor_conv::
- clear (
- )
- {
- if (filter_handle)
- cudnnDestroyFilterDescriptor((cudnnFilterDescriptor_t)filter_handle);
- if (conv_handle)
- cudnnDestroyConvolutionDescriptor((cudnnConvolutionDescriptor_t)conv_handle);
- filter_handle = nullptr;
- conv_handle = nullptr;
- out_num_samples = 0;
- out_k = 0;
- out_nr = 0;
- out_nc = 0;
-
- stride_y = 0;
- stride_x = 0;
- padding_y = 0;
- padding_x = 0;
- data_num_samples = 0;
- data_k = 0;
- data_nr = 0;
- data_nc = 0;
- filters_num_samples = 0;
- filters_k = 0;
- filters_nr = 0;
- filters_nc = 0;
-
- forward_algo = 0;
- backward_data_algo = 0;
- backward_filters_algo = 0;
-
- forward_workspace_size_in_bytes = 0;
- backward_data_workspace_size_in_bytes = 0;
- backward_filters_workspace_size_in_bytes = 0;
-
- forward_workspace.reset();
- backward_data_workspace.reset();
- backward_filters_workspace.reset();
- workspace.reset();
- }
-
- void tensor_conv::
- setup(
- const tensor& data,
- const tensor& filters,
- int stride_y_,
- int stride_x_,
- int padding_y_,
- int padding_x_
- )
- {
- DLIB_CASSERT(data.k() == filters.k());
-
- // if the last call to setup gave the same exact settings then don't do
- // anything.
- if (stride_y_ == stride_y &&
- stride_x_ == stride_x &&
- padding_y_ == padding_y &&
- padding_x_ == padding_x &&
- data_num_samples == data.num_samples() &&
- data_k == data.k() &&
- data_nr == data.nr() &&
- data_nc == data.nc() &&
- filters_num_samples == filters.num_samples() &&
- filters_k == filters.k() &&
- filters_nr == filters.nr() &&
- filters_nc == filters.nc())
- {
- return;
- }
-
- clear();
- try
- {
- stride_y = stride_y_;
- stride_x = stride_x_;
- padding_y = padding_y_;
- padding_x = padding_x_;
- data_num_samples = data.num_samples();
- data_k = data.k();
- data_nr = data.nr();
- data_nc = data.nc();
- filters_num_samples = filters.num_samples();
- filters_k = filters.k();
- filters_nr = filters.nr();
- filters_nc = filters.nc();
-
- CHECK_CUDNN(cudnnCreateFilterDescriptor((cudnnFilterDescriptor_t*)&filter_handle));
- CHECK_CUDNN(cudnnSetFilter4dDescriptor((cudnnFilterDescriptor_t)filter_handle,
- CUDNN_DATA_FLOAT,
- CUDNN_TENSOR_NCHW,
- filters.num_samples(),
- filters.k(),
- filters.nr(),
- filters.nc()));
-
- CHECK_CUDNN(cudnnCreateConvolutionDescriptor((cudnnConvolutionDescriptor_t*)&conv_handle));
-#if CUDNN_MAJOR >= 6
- CHECK_CUDNN(cudnnSetConvolution2dDescriptor((cudnnConvolutionDescriptor_t)conv_handle,
- padding_y, // vertical padding
- padding_x, // horizontal padding
- stride_y,
- stride_x,
- 1, 1, // must be 1,1
- CUDNN_CROSS_CORRELATION,
- CUDNN_DATA_FLOAT)); // could also be CUDNN_CONVOLUTION
-#else
- CHECK_CUDNN(cudnnSetConvolution2dDescriptor((cudnnConvolutionDescriptor_t)conv_handle,
- padding_y, // vertical padding
- padding_x, // horizontal padding
- stride_y,
- stride_x,
- 1, 1, // must be 1,1
- CUDNN_CROSS_CORRELATION)); // could also be CUDNN_CONVOLUTION
-#endif
-
- CHECK_CUDNN(cudnnGetConvolution2dForwardOutputDim(
- (const cudnnConvolutionDescriptor_t)conv_handle,
- descriptor(data),
- (const cudnnFilterDescriptor_t)filter_handle,
- &out_num_samples,
- &out_k,
- &out_nr,
- &out_nc));
-
- tensor_descriptor dest_desc;
- dest_desc.set_size(out_num_samples,out_k,out_nr,out_nc);
-
- // Pick which forward algorithm we will use and allocate the necessary
- // workspace buffer.
- cudnnConvolutionFwdAlgo_t forward_best_algo;
- CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(
- context(),
- descriptor(data),
- (const cudnnFilterDescriptor_t)filter_handle,
- (const cudnnConvolutionDescriptor_t)conv_handle,
- descriptor(dest_desc),
- dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_FWD_PREFER_FASTEST:CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
- std::numeric_limits<size_t>::max(),
- &forward_best_algo));
- forward_algo = forward_best_algo;
- CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
- context(),
- descriptor(data),
- (const cudnnFilterDescriptor_t)filter_handle,
- (const cudnnConvolutionDescriptor_t)conv_handle,
- descriptor(dest_desc),
- forward_best_algo,
- &forward_workspace_size_in_bytes));
-
- // Pick which backward data algorithm we will use and allocate the
- // necessary workspace buffer.
- cudnnConvolutionBwdDataAlgo_t backward_data_best_algo;
- CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(
- context(),
- (const cudnnFilterDescriptor_t)filter_handle,
- descriptor(dest_desc),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- descriptor(data),
- dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE,
- std::numeric_limits<size_t>::max(),
- &backward_data_best_algo));
- backward_data_algo = backward_data_best_algo;
-
- CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
- context(),
- (const cudnnFilterDescriptor_t)filter_handle,
- descriptor(dest_desc),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- descriptor(data),
- backward_data_best_algo,
- &backward_data_workspace_size_in_bytes));
-
- // Pick which backward filters algorithm we will use and allocate the
- // necessary workspace buffer.
- cudnnConvolutionBwdFilterAlgo_t backward_filters_best_algo;
- CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(
- context(),
- descriptor(data),
- descriptor(dest_desc),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- (const cudnnFilterDescriptor_t)filter_handle,
- dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE,
- std::numeric_limits<size_t>::max(),
- &backward_filters_best_algo));
- // cuDNN 5.1 has a bug that causes
- // cudnnGetConvolutionBackwardFilterAlgorithm() to pick the winograd
- // algorithm even for cases where cuDNN doesn't support it, leading to
- // incorrect outputs. So here we check if we are in a case where winograd
- // isn't supported and manually overrule
- // cudnnGetConvolutionBackwardFilterAlgorithm() by picking a safe
- // algorithm.
- if (dnn_prefer_fastest_algorithms() &&
- !(stride_x == 1 && stride_y == 1 && ((filters_nr==3&&filters_nc==3) || (filters_nr==5&&filters_nc==5)))
- )
- {
- backward_filters_best_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
- }
- backward_filters_algo = backward_filters_best_algo;
-
- CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
- context(),
- descriptor(data),
- descriptor(dest_desc),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- (const cudnnFilterDescriptor_t)filter_handle,
- backward_filters_best_algo,
- &backward_filters_workspace_size_in_bytes));
-
- workspace = device_global_buffer();
- }
- catch(...)
- {
- clear();
- throw;
- }
- }
-
- tensor_conv::
- ~tensor_conv (
- )
- {
- clear();
- }
-
- void tensor_conv::operator() (
- const bool add_to_output,
- resizable_tensor& output,
- const tensor& data,
- const tensor& filters
- )
- {
- DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
-
- output.set_size(out_num_samples, out_k, out_nr, out_nc);
- (*this)(add_to_output, static_cast<tensor&>(output), data, filters);
- }
-
- void tensor_conv::operator() (
- const bool add_to_output,
- tensor& output,
- const tensor& data,
- const tensor& filters
- )
- {
- DLIB_CASSERT(is_same_object(output,data) == false);
- DLIB_CASSERT(is_same_object(output,filters) == false);
- DLIB_CASSERT(filters.k() == data.k());
- DLIB_CASSERT(stride_y > 0 && stride_x > 0, "You must call setup() before calling this function");
- DLIB_CASSERT(filters.nc() <= data.nc() + 2*padding_x,
- "Filter windows must be small enough to fit into the padded image."
- << "\n\t filters.nc(): " << filters.nc()
- << "\n\t data.nc(): " << data.nc()
- << "\n\t padding_x: " << padding_x
- );
- DLIB_CASSERT(filters.nr() <= data.nr() + 2*padding_y,
- "Filter windows must be small enough to fit into the padded image."
- << "\n\t filters.nr(): " << filters.nr()
- << "\n\t data.nr(): " << data.nr()
- << "\n\t padding_y: " << padding_y
- );
-
-
- DLIB_CASSERT(output.num_samples() == data.num_samples(),out_num_samples << " " << data.num_samples());
- DLIB_CASSERT(output.k() == filters.num_samples());
- DLIB_CASSERT(output.nr() == 1+(data.nr()+2*padding_y-filters.nr())/stride_y);
- DLIB_CASSERT(output.nc() == 1+(data.nc()+2*padding_x-filters.nc())/stride_x);
-
-
-
- const float alpha = 1;
- const float beta = add_to_output ? 1 : 0;
-
- // Since cudnnConvolutionForward() is an asynchronous call, we need to hold a
- // reference to the workspace buffer so we can be sure it isn't reallocated
- // while the function is still executing on the device. But each time we come
- // here, we make sure to grab the latest workspace buffer so that, globally, we
- // minimize the number of such buffers.
- forward_workspace = workspace->get(forward_workspace_size_in_bytes);
-
- CHECK_CUDNN(cudnnConvolutionForward(
- context(),
- &alpha,
- descriptor(data),
- data.device(),
- (const cudnnFilterDescriptor_t)filter_handle,
- filters.device(),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- (cudnnConvolutionFwdAlgo_t)forward_algo,
- forward_workspace,
- forward_workspace_size_in_bytes,
- &beta,
- descriptor(output),
- output.device()));
- }
-
- void tensor_conv::get_gradient_for_data (
- const bool add_to_output,
- const tensor& gradient_input,
- const tensor& filters,
- tensor& data_gradient
- )
- {
- const float alpha = 1;
- const float beta = add_to_output ? 1 : 0;
-
- // Since cudnnConvolutionBackwardData() is an asynchronous call, we need to hold a
- // reference to the workspace buffer so we can be sure it isn't reallocated
- // while the function is still executing on the device. But each time we come
- // here, we make sure to grab the latest workspace buffer so that, globally, we
- // minimize the number of such buffers.
- backward_data_workspace = workspace->get(backward_data_workspace_size_in_bytes);
-
-
- CHECK_CUDNN(cudnnConvolutionBackwardData(context(),
- &alpha,
- (const cudnnFilterDescriptor_t)filter_handle,
- filters.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- (cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
- backward_data_workspace,
- backward_data_workspace_size_in_bytes,
- &beta,
- descriptor(data_gradient),
- data_gradient.device()));
- }
-
- void tensor_conv::
- get_gradient_for_filters (
- const bool add_to_output,
- const tensor& gradient_input,
- const tensor& data,
- tensor& filters_gradient
- )
- {
- const float alpha = 1;
- const float beta = add_to_output ? 1 : 0;
-
- // Since cudnnConvolutionBackwardFilter() is an asynchronous call, we need to hold a
- // reference to the workspace buffer so we can be sure it isn't reallocated
- // while the function is still executing on the device. But each time we come
- // here, we make sure to grab the latest workspace buffer so that, globally, we
- // minimize the number of such buffers.
- backward_filters_workspace = workspace->get(backward_filters_workspace_size_in_bytes);
-
- CHECK_CUDNN(cudnnConvolutionBackwardFilter(context(),
- &alpha,
- descriptor(data),
- data.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- (const cudnnConvolutionDescriptor_t)conv_handle,
- (cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
- backward_filters_workspace,
- backward_filters_workspace_size_in_bytes,
- &beta,
- (const cudnnFilterDescriptor_t)filter_handle,
- filters_gradient.device()));
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- pooling::pooling (
- ) : handle(nullptr),window_height(0),window_width(0),stride_y(0),stride_x(0),padding_y(0), padding_x(0)
- {
- }
-
- pooling::~pooling(
- )
- {
- clear();
- }
-
- void pooling::
- clear(
- )
- {
- if (handle)
- cudnnDestroyPoolingDescriptor((cudnnPoolingDescriptor_t)handle);
- handle = nullptr;
- window_height = 0;
- window_width = 0;
- stride_y = 0;
- stride_x = 0;
- padding_y = 0;
- padding_x = 0;
- }
-
- void pooling::
- setup_max_pooling(
- int window_height_,
- int window_width_,
- int stride_y_,
- int stride_x_,
- int padding_y_,
- int padding_x_
- )
- {
- setup(window_height_, window_width_, stride_y_, stride_x_, padding_y_, padding_x_, CUDNN_POOLING_MAX);
- do_max_pooling = true;
- }
-
- void pooling::
- setup_avg_pooling(
- int window_height_,
- int window_width_,
- int stride_y_,
- int stride_x_,
- int padding_y_,
- int padding_x_
- )
- {
- setup(window_height_, window_width_, stride_y_, stride_x_, padding_y_, padding_x_, CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING);
- do_max_pooling = false;
- }
-
- void pooling::
- setup(
- int window_height_,
- int window_width_,
- int stride_y_,
- int stride_x_,
- int padding_y_,
- int padding_x_,
- int pooling_mode
- )
- {
- DLIB_CASSERT (window_height_ > 0 && window_width_ > 0 &&
- stride_y_ > 0 && stride_x_ > 0 ,
- "window_height_: " << window_height_
- << "\t\n window_width_: " << window_width_
- << "\t\n stride_y_: " << stride_y_
- << "\t\n stride_x_: " << stride_x_ );
- DLIB_CASSERT( 0 <= padding_y_ && padding_y_ < window_height_ &&
- 0 <= padding_x_ && padding_x_ < window_width_,
- "window_height_: " << window_height_
- << "\t\n window_width_: " << window_width_
- << "\t\n padding_y_: " << padding_y_
- << "\t\n padding_x_: " << padding_x_ );
-
- if (window_height == window_height_ &&
- window_width == window_width_ &&
- stride_y == stride_y_ &&
- stride_x == stride_x_ &&
- padding_y == padding_y_ &&
- padding_x == padding_x_
- )
- {
- return;
- }
-
- clear();
- try
- {
- window_height = window_height_;
- window_width = window_width_;
- stride_x = stride_x_;
- stride_y = stride_y_;
- padding_y = padding_y_;
- padding_x = padding_x_;
- cudnnPoolingDescriptor_t poolingDesc;
- CHECK_CUDNN(cudnnCreatePoolingDescriptor(&poolingDesc));
- handle = poolingDesc;
-
- CHECK_CUDNN(cudnnSetPooling2dDescriptor(poolingDesc,
- (cudnnPoolingMode_t)pooling_mode,
- CUDNN_PROPAGATE_NAN,
- window_height,
- window_width,
- padding_y,
- padding_x,
- stride_y,
- stride_x));
- }
- catch(...)
- {
- clear();
- throw;
- }
- }
-
- void pooling::
- operator() (
- resizable_tensor& dest,
- const tensor& src
- )
- {
- DLIB_CASSERT(window_width <= src.nc() + 2*padding_x,
- "Pooling windows must be small enough to fit into the padded image."
- << "\n\t window_width: " << window_width
- << "\n\t src.nc(): " << src.nc()
- << "\n\t padding_x: " << padding_x
- );
- DLIB_CASSERT(window_height <= src.nr() + 2*padding_y,
- "Pooling windows must be small enough to fit into the padded image."
- << "\n\t window_height: " << window_height
- << "\n\t src.nr(): " << src.nr()
- << "\n\t padding_y: " << padding_y
- );
- const float alpha = 1;
- const float beta = 0;
- int outN;
- int outC;
- int outH;
- int outW;
- CHECK_CUDNN(cudnnGetPooling2dForwardOutputDim((const cudnnPoolingDescriptor_t)handle,
- descriptor(src),
- &outN,
- &outC,
- &outH,
- &outW));
-
-
- dest.set_size(outN,outC,outH,outW);
-
- DLIB_CASSERT(dest.num_samples() == src.num_samples());
- DLIB_CASSERT(dest.k() == src.k());
- DLIB_CASSERT(dest.nr() == 1 + (src.nr() + 2*padding_y - window_height)/stride_y,
- "\n stride_y: " << stride_y <<
- "\n padding_y: " << padding_y <<
- "\n window_height: " << window_height <<
- "\n src.nr(): " << src.nr() <<
- "\n dest.nr(): " << dest.nr() <<
- "\n src.nr()/stride_y: " << src.nr()/stride_y);
- DLIB_CASSERT(dest.nc() == 1 + (src.nc() + 2*padding_x - window_width)/stride_x,
- "\n stride_x: " << stride_x <<
- "\n padding_x: " << padding_x <<
- "\n window_width: " << window_width <<
- "\n src.nc(): " << src.nc() <<
- "\n dest.nc(): " << dest.nc() <<
- "\n src.nc()/stride_x: " << src.nc()/stride_x);
-
- CHECK_CUDNN(cudnnPoolingForward(context(),
- (const cudnnPoolingDescriptor_t)handle,
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
- void pooling::get_gradient(
- const tensor& gradient_input,
- const tensor& dest,
- const tensor& src,
- tensor& grad
- )
- {
- DLIB_CASSERT(have_same_dimensions(gradient_input,dest));
- DLIB_CASSERT(have_same_dimensions(src,grad));
-
- const float alpha = 1;
- const float beta = 1;
- CHECK_CUDNN(cudnnPoolingBackward(context(),
- (const cudnnPoolingDescriptor_t)handle,
- &alpha,
- descriptor(dest),
- dest.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- descriptor(src),
- src.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- void softmax (
- tensor& dest,
- const tensor& src
- )
- {
- DLIB_CASSERT(have_same_dimensions(dest,src));
- if (src.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = 0;
-
- CHECK_CUDNN(cudnnSoftmaxForward(context(),
- CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_CHANNEL,
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
-
- void softmax_gradient (
- tensor& grad,
- const tensor& dest,
- const tensor& gradient_input
- )
- {
- DLIB_CASSERT(
- have_same_dimensions(dest,gradient_input) == true &&
- have_same_dimensions(dest,grad) == true );
- if (dest.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = is_same_object(grad,gradient_input) ? 0 : 1;
- CHECK_CUDNN(cudnnSoftmaxBackward(context(),
- CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_CHANNEL,
- &alpha,
- descriptor(dest),
- dest.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- void softmax_all (
- tensor& dest,
- const tensor& src
- )
- {
- DLIB_CASSERT(have_same_dimensions(dest,src));
- if (src.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = 0;
-
- CHECK_CUDNN(cudnnSoftmaxForward(context(),
- CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_INSTANCE,
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
-
- void softmax_all_gradient (
- tensor& grad,
- const tensor& dest,
- const tensor& gradient_input
- )
- {
- DLIB_CASSERT(
- have_same_dimensions(dest,gradient_input) == true &&
- have_same_dimensions(dest,grad) == true );
- if (dest.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = is_same_object(grad,gradient_input) ? 0 : 1;
- CHECK_CUDNN(cudnnSoftmaxBackward(context(),
- CUDNN_SOFTMAX_ACCURATE,
- CUDNN_SOFTMAX_MODE_INSTANCE,
- &alpha,
- descriptor(dest),
- dest.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- void sigmoid (
- tensor& dest,
- const tensor& src
- )
- {
- DLIB_CASSERT(have_same_dimensions(dest,src));
- if (src.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = 0;
- CHECK_CUDNN(cudnnActivationForward(context(),
- sigmoid_activation_descriptor(),
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
- void sigmoid_gradient (
- tensor& grad,
- const tensor& dest,
- const tensor& gradient_input
- )
- {
- DLIB_CASSERT(
- have_same_dimensions(dest,gradient_input) == true &&
- have_same_dimensions(dest,grad) == true );
- if (dest.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = is_same_object(grad,gradient_input) ? 0 : 1;
- CHECK_CUDNN(cudnnActivationBackward(context(),
- sigmoid_activation_descriptor(),
- &alpha,
- descriptor(dest),
- dest.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- descriptor(dest),
- dest.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
-
- void relu (
- tensor& dest,
- const tensor& src
- )
- {
- DLIB_CASSERT(have_same_dimensions(dest,src));
- if (src.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = 0;
- CHECK_CUDNN(cudnnActivationForward(context(),
- relu_activation_descriptor(),
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
- void relu_gradient (
- tensor& grad,
- const tensor& dest,
- const tensor& gradient_input
- )
- {
- DLIB_CASSERT(
- have_same_dimensions(dest,gradient_input) == true &&
- have_same_dimensions(dest,grad) == true );
- if (dest.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = is_same_object(grad,gradient_input) ? 0 : 1;
- CHECK_CUDNN(cudnnActivationBackward(context(),
- relu_activation_descriptor(),
- &alpha,
- descriptor(dest),
- dest.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- descriptor(dest),
- dest.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
-
- void tanh (
- tensor& dest,
- const tensor& src
- )
- {
- DLIB_CASSERT(have_same_dimensions(dest,src));
- if (src.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = 0;
- CHECK_CUDNN(cudnnActivationForward(context(),
- tanh_activation_descriptor(),
- &alpha,
- descriptor(src),
- src.device(),
- &beta,
- descriptor(dest),
- dest.device()));
- }
-
- void tanh_gradient (
- tensor& grad,
- const tensor& dest,
- const tensor& gradient_input
- )
- {
- DLIB_CASSERT(
- have_same_dimensions(dest,gradient_input) == true &&
- have_same_dimensions(dest,grad) == true);
- if (dest.size() == 0)
- return;
-
- const float alpha = 1;
- const float beta = is_same_object(grad,gradient_input) ? 0 : 1;
- CHECK_CUDNN(cudnnActivationBackward(context(),
- tanh_activation_descriptor(),
- &alpha,
- descriptor(dest),
- dest.device(),
- descriptor(gradient_input),
- gradient_input.device(),
- descriptor(dest),
- dest.device(),
- &beta,
- descriptor(grad),
- grad.device()));
- }
-
- // ------------------------------------------------------------------------------------
- }
-}
-
-#endif // DLIB_USE_CUDA
-
-#endif // DLIB_DNN_CuDNN_CPP_
-
-