diff options
Diffstat (limited to 'ml/dlib/dlib/dnn/layers_abstract.h')
-rw-r--r-- | ml/dlib/dlib/dnn/layers_abstract.h | 2631 |
1 files changed, 0 insertions, 2631 deletions
diff --git a/ml/dlib/dlib/dnn/layers_abstract.h b/ml/dlib/dlib/dnn/layers_abstract.h deleted file mode 100644 index f07025ff8..000000000 --- a/ml/dlib/dlib/dnn/layers_abstract.h +++ /dev/null @@ -1,2631 +0,0 @@ -// Copyright (C) 2015 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_DNn_LAYERS_ABSTRACT_H_ -#ifdef DLIB_DNn_LAYERS_ABSTRACT_H_ - -#include "tensor_abstract.h" -#include "core_abstract.h" - - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - class SUBNET - { - /*! - WHAT THIS OBJECT REPRESENTS - This object represents a deep neural network. In particular, it is - the simplified interface through which layer objects interact with their - subnetworks. A layer's two important tasks are to (1) take outputs from its - subnetwork and forward propagate them through itself and (2) to backwards - propagate an error gradient through itself and onto its subnetwork. - The idea of a subnetwork is illustrated in the following diagram: - - +---------------------------------------------------------+ - | loss <-- layer1 <-- layer2 <-- ... <-- layern <-- input | - +---------------------------------------------------------+ - ^ ^ - \__ subnetwork for layer1 __/ - - Therefore, by "subnetwork" we mean the part of the network closer to the - input. - - Note that there is no dlib::SUBNET type. It is shown here purely to - document the interface layer objects expect to see when they interact - with a network. - !*/ - - public: - // You aren't allowed to copy subnetworks from inside a layer. - SUBNET(const SUBNET&) = delete; - SUBNET& operator=(const SUBNET&) = delete; - - const tensor& get_output( - ) const; - /*! - ensures - - returns the output of this subnetwork. This is the data that the next - layer in the network will take as input. - - have_same_dimensions(#get_gradient_input(), get_output()) == true - !*/ - - tensor& get_gradient_input( - ); - /*! - ensures - - returns the error gradient for this subnetwork. That is, this is the - error gradient that this network will use to update itself. Therefore, - when performing back propagation, layers that sit on top of this - subnetwork write their back propagated error gradients into - get_gradient_input(). Or to put it another way, during back propagation, - layers take the contents of their get_gradient_input() and back propagate - it through themselves and store the results into their subnetwork's - get_gradient_input(). - !*/ - - const NEXT_SUBNET& subnet( - ) const; - /*! - ensures - - returns the subnetwork of *this network. With respect to the diagram - above, if *this was layer1 then subnet() would return the network that - begins with layer2. - !*/ - - NEXT_SUBNET& subnet( - ); - /*! - ensures - - returns the subnetwork of *this network. With respect to the diagram - above, if *this was layer1 then subnet() would return the network that - begins with layer2. - !*/ - - const layer_details_type& layer_details( - ) const; - /*! - ensures - - returns the layer_details_type instance that defines the behavior of the - layer at the top of this network. I.e. returns the layer details that - defines the behavior of the layer nearest to the network output rather - than the input layer. For computational layers, this is the object - implementing the EXAMPLE_COMPUTATIONAL_LAYER_ interface that defines the - layer's behavior. - !*/ - - unsigned int sample_expansion_factor ( - ) const; - /*! - ensures - - When to_tensor() is invoked on this network's input layer it converts N - input objects into M samples, all stored inside a resizable_tensor. It - is always the case that M is some integer multiple of N. - sample_expansion_factor() returns the value of this multiplier. To be - very specific, it is always true that M==I*N where I is some integer. - This integer I is what is returned by sample_expansion_factor(). - - It should be noted that computational layers likely do not care about the - sample expansion factor. It is only really of concern inside a loss - layer where you need to know its value so that tensor samples can be - matched against truth objects. Moreover, in most cases the sample - expansion factor is 1. - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - class EXAMPLE_COMPUTATIONAL_LAYER_ - { - /*! - WHAT THIS OBJECT REPRESENTS - Each computational layer in a deep neural network can be thought of as a - function, f(data,parameters), that takes in a data tensor, some parameters, - and produces an output tensor. You create an entire deep network by - composing these functions. Importantly, you are able to use a wide range - of different functions to accommodate the task you are trying to - accomplish. Therefore, dlib includes a number of common layer types but if - you want to define your own then you simply implement a class with the same - interface as EXAMPLE_COMPUTATIONAL_LAYER_. - - Note that there is no dlib::EXAMPLE_COMPUTATIONAL_LAYER_ type. It is shown - here purely to document the interface that a layer object must implement. - - The central work of defining a layer is implementing the forward and backward - methods. When you do this you have four options: - - Implement the forward() and backward() methods according to the - specification shown below. Do not implement forward_inplace() and - backward_inplace(). - - Implement the forward() and backward() methods according to the - specification shown below, except exclude the computed_output - parameter from backward(). Doing this will allow dlib to make some - layers execute in-place and therefore run a little faster and use - less memory. Do not implement forward_inplace() and - backward_inplace(). - - Implement the forward_inplace() and backward_inplace() methods - according to the specification shown below. Do not implement - forward() and backward(). These in-place methods allow some types of - layers to be implemented more efficiently. - - Implement the forward_inplace() and backward_inplace() methods - according to the specification shown below, except exclude the - computed_output parameter from backward_inplace(). Doing this will - allow dlib to make some layers execute in-place and therefore run a - little faster and use less memory. Do not implement forward() and - backward(). - - - It should also be noted that layers may define additional layer specific - fields and the solvers can use these fields as they see fit. For example, - some layers define get_learning_rate_multiplier() and - get_weight_decay_multiplier() methods. The solvers that come with dlib - look at these methods, if they exist, and adjust the learning rate or - weight decay for that layer according to the multiplier. Therefore, you - can add these methods to your layer types if you want, or even define new - fields and new solvers that use those fields in some way. - !*/ - - public: - - EXAMPLE_COMPUTATIONAL_LAYER_( - ); - /*! - ensures - - Default constructs this object. This function is not required to do - anything in particular but it must exist, that is, it is required that - layer objects be default constructable. - !*/ - - EXAMPLE_COMPUTATIONAL_LAYER_ ( - const EXAMPLE_COMPUTATIONAL_LAYER_& item - ); - /*! - ensures - - EXAMPLE_COMPUTATIONAL_LAYER_ objects are copy constructable - !*/ - - EXAMPLE_COMPUTATIONAL_LAYER_( - const some_other_layer_type& item - ); - /*! - ensures - - Constructs this object from item. This form of constructor is optional - but it allows you to provide a conversion from one layer type to another. - For example, the following code is valid only if my_layer2 can be - constructed from my_layer1: - relu<fc<my_layer1<fc<input<matrix<float>>>>>> my_dnn1; - relu<fc<my_layer2<fc<input<matrix<float>>>>>> my_dnn2(my_dnn1); - This kind of pattern is useful if you want to use one type of layer - during training but a different type of layer during testing since it - allows you to easily convert between related deep neural network types. - - Additionally, if you provide a constructor to build a layer from another - layer type you should also write your layer's deserialize() routine such - that it can read that other layer's serialized data in addition to your - own serialized data. - !*/ - - template <typename SUBNET> - void setup ( - const SUBNET& sub - ); - /*! - requires - - SUBNET implements the SUBNET interface defined at the top of this file. - ensures - - performs any necessary initial memory allocations and/or sets parameters - to their initial values prior to learning. Therefore, calling setup - destroys any previously learned parameters. Also, typically setup() - would look at the dimensions of the outputs of sub and configure the - number of parameters in *this accordingly. - !*/ - - template <typename SUBNET> - void forward( - const SUBNET& sub, - resizable_tensor& data_output - ); - /*! - requires - - SUBNET implements the SUBNET interface defined at the top of this file. - - setup() has been called. - ensures - - Runs the output of the subnetwork through this layer and stores the - results into #data_output. In particular, forward() can use any of the - outputs in sub (e.g. sub.get_output(), sub.subnet().get_output(), etc.) - to compute whatever it wants. - !*/ - - template <typename SUBNET> - void backward( - const tensor& computed_output, // this parameter is optional - const tensor& gradient_input, - SUBNET& sub, - tensor& params_grad - ); - /*! - requires - - SUBNET implements the SUBNET interface defined at the top of this file. - - setup() has been called. - - computed_output is the tensor resulting from calling forward(sub,computed_output). - Moreover, this was the most recent call to forward(). This means that - forward() is allowed to cache intermediate results so they can be used - during the backward computation. - - have_same_dimensions(gradient_input, computed_output) == true - - have_same_dimensions(sub.get_gradient_input(), sub.get_output()) == true - - have_same_dimensions(params_grad, get_layer_params()) == true - ensures - - This function outputs the gradients of this layer with respect to the - input data from sub and also with respect to this layer's parameters. - These gradients are stored into #sub and #params_grad, respectively. To be - precise, the gradients are taken of a function f(sub,get_layer_params()) - which is defined thusly: - - Recalling that computed_output is a function of both sub and get_layer_params(), - since it is the result of calling forward(sub,computed_output): - let f(sub,get_layer_params()) == dot(computed_output, gradient_input) - Then we define the following gradient vectors: - - PARAMETER_GRADIENT == gradient of f(sub,get_layer_params()) with - respect to get_layer_params(). - - for all valid I: - - DATA_GRADIENT_I == gradient of f(sub,get_layer_params()) with - respect to layer<I>(sub).get_output() (recall that forward() can - draw inputs from the immediate sub layer, sub.subnet(), or - any earlier layer. So you must consider the gradients with - respect to all inputs drawn from sub) - Finally, backward() outputs these gradients by performing: - - params_grad = PARAMETER_GRADIENT - - for all valid I: - - layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I - !*/ - - void forward_inplace( - const tensor& data_input, - tensor& data_output - ); - /*! - requires - - have_same_dimensions(data_input,data_output) == true - - setup() has been called. - ensures - - Runs the data_input tensor through this layer and stores the output into - #data_output. - - This function supports in-place operation, i.e. having - is_same_object(data_input, data_output)==true - !*/ - - void backward_inplace( - const tensor& computed_output, // this parameter is optional - const tensor& gradient_input, - tensor& data_grad, - tensor& params_grad - ); - /*! - requires - - setup() has been called. - - computed_output is the tensor resulting from the most recent call to - forward_inplace(). This means that forward_inplace() is allowed to cache - intermediate results so they can be used during the backward computation. - - have_same_dimensions(gradient_input, data_grad) == true - - have_same_dimensions(gradient_input, computed_output) == true - - have_same_dimensions(params_grad, get_layer_params()) == true - ensures - - This function supports in-place operation, i.e. having - is_same_object(gradient_input, data_grad)==true - - This function outputs the gradients of this layer with respect to the - input data from a sublayer and also with respect to this layer's parameters. - These gradients are stored into #data_grad and #params_grad, respectively. To be - precise, the gradients are taken of a function f(data_input,get_layer_params()) - which is defined thusly: - - Recalling that computed_output is a function of both the input to - forward_inplace() and get_layer_params(), since it is the result of - calling forward_inplace(data_input,computed_output): - let f(data_input,get_layer_params()) == dot(computed_output, gradient_input) - Then we define the following gradient vectors: - - PARAMETER_GRADIENT == gradient of f(data_input,get_layer_params()) with - respect to get_layer_params(). - - DATA_GRADIENT == gradient of f(data_input,get_layer_params()) with respect - to data_input. - Finally, backward_inplace() outputs these gradients by performing: - - params_grad = PARAMETER_GRADIENT - - if (is_same_object(gradient_input, data_grad)) then - - data_grad = DATA_GRADIENT - - else - - data_grad += DATA_GRADIENT - !*/ - - const tensor& get_layer_params( - ) const; - /*! - ensures - - returns the parameters that define the behavior of forward(). - !*/ - - tensor& get_layer_params( - ); - /*! - ensures - - returns the parameters that define the behavior of forward(). - !*/ - - - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - /*! - These two functions are optional. If provided, they should map between - (column,row) coordinates in input and output tensors of forward(). Providing - these functions allows you to use global utility functions like - input_tensor_to_output_tensor(). - !*/ - - void clean ( - ); - /*! - Implementing this function is optional. If you don't need it then you don't - have to provide a clean(). But if you do provide it then it must behave as - follows: - - ensures - - calling clean() Causes this object to forget about everything except its - parameters. This is useful if your layer caches information between - forward and backward passes and you want to clean out that cache - information before saving the network to disk. - !*/ - - }; - - std::ostream& operator<<(std::ostream& out, const EXAMPLE_COMPUTATIONAL_LAYER_& item); - /*! - print a string describing this layer. - !*/ - - void to_xml(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out); - /*! - This function is optional, but required if you want to print your networks with - net_to_xml(). Therefore, to_xml() prints a layer as XML. - !*/ - - void serialize(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out); - void deserialize(EXAMPLE_COMPUTATIONAL_LAYER_& item, std::istream& in); - /*! - provides serialization support - !*/ - - // For each layer you define, always define an add_layer template so that layers can be - // easily composed. Moreover, the convention is that the layer class ends with an _ - // while the add_layer template has the same name but without the trailing _. - template <typename SUBNET> - using EXAMPLE_COMPUTATIONAL_LAYER = add_layer<EXAMPLE_COMPUTATIONAL_LAYER_, SUBNET>; - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - enum fc_bias_mode - { - FC_HAS_BIAS = 0, - FC_NO_BIAS = 1 - }; - - struct num_fc_outputs - { - num_fc_outputs(unsigned long n) : num_outputs(n) {} - unsigned long num_outputs; - }; - - template < - unsigned long num_outputs, - fc_bias_mode bias_mode - > - class fc_ - { - /*! - REQUIREMENTS ON num_outputs - num_outputs > 0 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a fully connected layer that - takes an input tensor and multiplies it by a weight matrix and outputs the - results. - - The dimensions of the tensors output by this layer are as follows (letting - IN be the input tensor and OUT the output tensor): - - OUT.num_samples() == IN.num_samples() - - OUT.k() == get_num_outputs() - - OUT.nr() == 1 - - OUT.nc() == 1 - !*/ - - public: - - fc_( - ); - /*! - ensures - - #get_num_outputs() == num_outputs - - #get_bias_mode() == bias_mode - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 1 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 0 - !*/ - - fc_( - num_fc_outputs o - ); - /*! - ensures - - #get_num_outputs() == o.num_outputs - - #get_bias_mode() == bias_mode - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 1 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 0 - !*/ - - unsigned long get_num_outputs ( - ) const; - /*! - ensures - - This layer outputs column vectors that contain get_num_outputs() - elements. That is, the output tensor T from forward() will be such that: - - T.num_samples() == however many samples were given to forward(). - - T.k() == get_num_outputs() - - The rest of the dimensions of T will be 1. - !*/ - - void set_num_outputs( - long num - ); - /*! - requires - - num > 0 - - get_layer_params().size() == 0 || get_num_outputs() == num - (i.e. You can't change the number of outputs in fc_ if the parameter - tensor has already been allocated.) - ensures - - #get_num_outputs() == num - !*/ - - fc_bias_mode get_bias_mode ( - ) const; - /*! - ensures - - returns the bias mode which determines if this layer includes bias terms. - That is, if the bias mode is FC_HAS_BIAS then a different constant scalar - is added to each of the outputs of this layer. - !*/ - - double get_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its parameters be - multiplied by get_learning_rate_multiplier(). - !*/ - - double get_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its parameters be - multiplied by get_weight_decay_multiplier(). - !*/ - - void set_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_learning_rate_multiplier() == val - !*/ - - void set_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_weight_decay_multiplier() == val - !*/ - - double get_bias_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its bias parameters be - multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier(). - !*/ - - double get_bias_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its bias parameters be - multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier(). - !*/ - - void set_bias_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_learning_rate_multiplier() == val - !*/ - - void set_bias_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_weight_decay_multiplier() == val - !*/ - - alias_tensor_const_instance get_weights( - ) const; - /*! - ensures - - returns an alias of get_layer_params(), containing the weights matrix of - the fully connected layer. - - #get_weights().num_samples() is the number of elements in input sample, - i.e. sublayer's output's k * nc * nr. - - #get_bias().k() == #get_num_outputs() - - if get_bias_mode() == FC_HAS_BIAS: - - #get_layer_params().size() == (#get_weights().size() + #get_biases().size()) - - else: - - #get_layer_params().size() == #get_weights().size() - !*/ - - alias_tensor_instance get_weights( - ); - /*! - ensures - - returns an alias of get_layer_params(), containing the weights matrix of - the fully connected layer. - - #get_weights().num_samples() is the number of elements in input sample, - i.e. sublayer's output's k * nc * nr. - - #get_bias().k() == #get_num_outputs() - - if get_bias_mode() == FC_HAS_BIAS: - - #get_layer_params().size() == (#get_weights().size() + #get_biases().size()) - - else: - - #get_layer_params().size() == #get_weights().size() - !*/ - - alias_tensor_const_instance get_biases( - ) const; - /*! - requires - - #get_bias_mode() == FC_HAS_BIAS - ensures - - returns an alias of get_layer_params(), containing the bias vector of - the fully connected layer. - - #get_bias().num_samples() == 1 - - #get_bias().k() == #get_num_outputs() - - #get_layer_params().size() == (#get_weights().size() + #get_biases().size()) - !*/ - - alias_tensor_instance get_biases( - ); - /*! - requires - - #get_bias_mode() == FC_HAS_BIAS - ensures - - returns an alias of get_layer_params(), containing the bias vector of - the fully connected layer. - - #get_bias().num_samples() == 1 - - #get_bias().k() == #get_num_outputs() - - #get_layer_params().size() == (#get_weights().size() + #get_biases().size()) - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - - }; - - template < - unsigned long num_outputs, - typename SUBNET - > - using fc = add_layer<fc_<num_outputs,FC_HAS_BIAS>, SUBNET>; - - template < - unsigned long num_outputs, - typename SUBNET - > - using fc_no_bias = add_layer<fc_<num_outputs,FC_NO_BIAS>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - struct num_con_outputs - { - num_con_outputs(unsigned long n) : num_outputs(n) {} - unsigned long num_outputs; - }; - - template < - long _num_filters, - long _nr, - long _nc, - int _stride_y, - int _stride_x, - int _padding_y = _stride_y!=1? 0 : _nr/2, - int _padding_x = _stride_x!=1? 0 : _nc/2 - > - class con_ - { - /*! - REQUIREMENTS ON TEMPLATE ARGUMENTS - - _num_filters > 0 - - _nr >= 0 - - _nc >= 0 - - _stride_y > 0 - - _stride_x > 0 - - _padding_y >= 0 - - _padding_x >= 0 - - Also, we require that: - - if (_nr == 0) then - - _padding_y == 0 - - else - - _padding_y < _nr - - if (_nc == 0) then - - _padding_x == 0 - - else - - _padding_x < _nc - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a convolution layer that takes an - input tensor (nominally representing an image) and convolves it with a set - of filters and then outputs the results. - - The dimensions of the tensors output by this layer are as follows (letting - IN be the input tensor and OUT the output tensor): - - OUT.num_samples() == IN.num_samples() - - OUT.k() == num_filters() - - OUT.nr() == 1+(IN.nr() + 2*padding_y() - nr())/stride_y() - - OUT.nc() == 1+(IN.nc() + 2*padding_x() - nc())/stride_x() - - Note also that setting _nr or _nc to 0 has a special meaning of "set the - filter size equal to the input image size". Specifically, it means: - - if (_nr == 0) then - - nr() == IN.nr() - - OUT.nr() == 1 - - if (_nc == 0) then - - nc() == IN.nc() - - OUT.nc() == 1 - !*/ - - public: - con_( - ); - /*! - ensures - - #num_filters() == _num_filters - - #nr() == _nr - - #nc() == _nc - - #stride_y() == _stride_y - - #stride_x() == _stride_x - - #padding_y() == _padding_y - - #padding_x() == _padding_x - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 1 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 0 - !*/ - - con_( - num_con_outputs o - ); - /*! - ensures - - #num_filters() == o.num_outputs - - #nr() == _nr - - #nc() == _nc - - #stride_y() == _stride_y - - #stride_x() == _stride_x - - #padding_y() == _padding_y - - #padding_x() == _padding_x - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 1 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 0 - !*/ - - long num_filters( - ) const; - /*! - ensures - - returns the number of filters contained in this layer. The k dimension - of the output tensors produced by this layer will be equal to the number - of filters. - !*/ - - void set_num_filters( - long num - ); - /*! - requires - - num > 0 - - get_layer_params().size() == 0 || num_filters() == num - (i.e. You can't change the number of filters in con_ if the parameter - tensor has already been allocated.) - ensures - - #num_filters() == num - !*/ - - long nr( - ) const; - /*! - ensures - - returns the number of rows in the filters in this layer. Note that if - nr()==0 then it means the size of the filter is not yet assigned, but - once setup() is called nr() will be set to the input tensor's nr(). - Therefore, nr()==0 has the special interpretation of "be the same size as - the input tensor". - !*/ - - long nc( - ) const; - /*! - ensures - - returns the number of columns in the filters in this layer. Note that if - nc()==0 then it means the size of the filter is not yet assigned, but - once setup() is called nc() will be set to the input tensor's nc(). - Therefore, nc()==0 has the special interpretation of "be the same size as - the input tensor". - !*/ - - long stride_y( - ) const; - /*! - ensures - - returns the vertical stride used when convolving the filters over an - image. That is, each filter will be moved stride_y() pixels down at a - time when it moves over the image. - !*/ - - long stride_x( - ) const; - /*! - ensures - - returns the horizontal stride used when convolving the filters over an - image. That is, each filter will be moved stride_x() pixels right at a - time when it moves over the image. - !*/ - - long padding_y( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the top and bottom - sides of the image. - !*/ - - long padding_x( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the left and right - sides of the image. - !*/ - - double get_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its parameters be - multiplied by get_learning_rate_multiplier(). - !*/ - - double get_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its parameters be - multiplied by get_weight_decay_multiplier(). - !*/ - - void set_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_learning_rate_multiplier() == val - !*/ - - void set_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_weight_decay_multiplier() == val - !*/ - - double get_bias_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its bias parameters be - multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier(). - !*/ - - double get_bias_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its bias parameters be - multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier(). - !*/ - - void set_bias_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_learning_rate_multiplier() == val - !*/ - - void set_bias_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_weight_decay_multiplier() == val - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - - }; - - template < - long num_filters, - long nr, - long nc, - int stride_y, - int stride_x, - typename SUBNET - > - using con = add_layer<con_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - template < - long _num_filters, - long _nr, - long _nc, - int _stride_y, - int _stride_x, - int _padding_y = _stride_y!=1? 0 : _nr/2, - int _padding_x = _stride_x!=1? 0 : _nc/2 - > - class cont_ - { - /*! - REQUIREMENTS ON TEMPLATE ARGUMENTS - All of them must be > 0. - Also, we require that: - - 0 <= _padding_y && _padding_y < _nr - - 0 <= _padding_x && _padding_x < _nc - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a transposed convolution layer - that takes an input tensor and transpose convolves (sometimes called - "deconvolution") it with a set of filters and then outputs the results. - - This is essentially a convolutional layer that allows fractional strides. - Therefore, you can make output tensors that are larger than the input - tensors using this layer type. - - - The dimensions of the tensors output by this layer are as follows (letting - IN be the input tensor and OUT the output tensor): - - OUT.num_samples() == IN.num_samples() - - OUT.k() == num_filters() - - OUT.nr() == stride_y()*(IN.nr()-1) + nr() - 2*padding_y() - - OUT.nc() == stride_x()*(IN.nc()-1) + nc() - 2*padding_x() - !*/ - - public: - cont_( - ); - /*! - ensures - - #num_filters() == _num_filters - - #nr() == _nr - - #nc() == _nc - - #stride_y() == _stride_y - - #stride_x() == _stride_x - - #padding_y() == _padding_y - - #padding_x() == _padding_x - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 1 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 0 - !*/ - - cont_( - num_con_outputs o - ); - /*! - ensures - - #num_filters() == o.num_outputs - - #nr() == _nr - - #nc() == _nc - - #stride_y() == _stride_y - - #stride_x() == _stride_x - - #padding_y() == _padding_y - - #padding_x() == _padding_x - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 1 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 0 - !*/ - - long num_filters( - ) const; - /*! - ensures - - returns the number of filters contained in this layer. The k dimension - of the output tensors produced by this layer will be equal to the number - of filters. - !*/ - - void set_num_filters( - long num - ); - /*! - requires - - num > 0 - - get_layer_params().size() == 0 || num_filters() == num - (i.e. You can't change the number of filters in cont_ if the parameter - tensor has already been allocated.) - ensures - - #num_filters() == num - !*/ - - long nr( - ) const; - /*! - ensures - - returns the number of rows in the filters in this layer. - !*/ - - long nc( - ) const; - /*! - ensures - - returns the number of columns in the filters in this layer. - !*/ - - long stride_y( - ) const; - /*! - ensures - - returns the vertical stride used when convolving the filters over an - image. That is, each filter will be moved 1.0/stride_y() pixels down at - a time when it moves over the image. - !*/ - - long stride_x( - ) const; - /*! - ensures - - returns the horizontal stride used when convolving the filters over an - image. That is, each filter will be moved 1.0/stride_x() pixels right at - a time when it moves over the image. - !*/ - - long padding_y( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the top and bottom - sides of the image. - !*/ - - long padding_x( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the left and right - sides of the image. - !*/ - - double get_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its parameters be - multiplied by get_learning_rate_multiplier(). - !*/ - - double get_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its parameters be - multiplied by get_weight_decay_multiplier(). - !*/ - - void set_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_learning_rate_multiplier() == val - !*/ - - void set_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_weight_decay_multiplier() == val - !*/ - - double get_bias_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its bias parameters be - multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier(). - !*/ - - double get_bias_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its bias parameters be - multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier(). - !*/ - - void set_bias_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_learning_rate_multiplier() == val - !*/ - - void set_bias_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_weight_decay_multiplier() == val - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - - }; - - template < - long num_filters, - long nr, - long nc, - int stride_y, - int stride_x, - typename SUBNET - > - using cont = add_layer<cont_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - template < - int scale_y, - int scale_x - > - class upsample_ - { - /*! - REQUIREMENTS ON TEMPLATE ARGUMENTS - All of them must be >= 1. - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it allows you to upsample a layer using - bilinear interpolation. To be very specific, it upsamples each of the - channels in an input tensor. Therefore, if IN is the input tensor to this - layer and OUT the output tensor, then we will have: - - OUT.num_samples() == IN.num_samples() - - OUT.k() == IN.k() - - OUT.nr() == IN.nr()*scale_y - - OUT.nc() == IN.nr()*scale_x - - for all valid i,k: image_plane(OUT,i,k) is a copy of - image_plane(IN,i,k) that has been bilinearly interpolated to fit into - the shape of image_plane(OUT,i,k). - !*/ - public: - - upsample_( - ); - /*! - ensures - - This object has no state, so the constructor does nothing, aside from - providing default constructability. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - template < - int scale, - typename SUBNET - > - using upsample = add_layer<upsample_<scale,scale>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class dropout_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a dropout layer. Therefore, it - passes its inputs through the stochastic function f(x) which outputs either - 0 or x. The probability of 0 being output is given by the drop_rate - argument to this object's constructor. - - Note that, after you finish training a network with dropout, it is a good - idea to replace each dropout_ layer with a multiply_ layer because the - multiply_ layer is faster and deterministic. - !*/ - - public: - - explicit dropout_( - float drop_rate = 0.5 - ); - /*! - requires - - 0 <= drop_rate <= 1 - ensures - - #get_drop_rate() == drop_rate - !*/ - - float get_drop_rate ( - ) const; - /*! - ensures - - returns the probability that an individual input value to this layer will - be replaced with 0. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - template <typename SUBNET> - using dropout = add_layer<dropout_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class multiply_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a basic layer that just - multiplies its input tensor with a constant value and returns the result. - It therefore has no learnable parameters. - !*/ - - public: - explicit multiply_( - float val = 0.5 - ); - /*! - ensures - - #get_multiply_value() == val - !*/ - - multiply_ ( - const dropout_& item - ); - /*! - ensures - - #get_multiply_value() == 1-item.get_drop_rate() - (i.e. We construct the multiply_ layer so that it is essentially a - deterministic version of the given dropout_ layer) - !*/ - - float get_multiply_value ( - ) const; - /*! - ensures - - this layer simply multiplies its input tensor by get_multiply_value() and - produces the result as output. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - template <typename SUBNET> - using multiply = add_layer<multiply_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - enum layer_mode - { - CONV_MODE = 0, // convolutional mode - FC_MODE = 1 // fully connected mode - }; - - const double DEFAULT_BATCH_NORM_EPS = 0.0001; - - template < - layer_mode mode - > - class bn_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a batch normalization layer that - implements the method described in the paper: - Batch Normalization: Accelerating Deep Network Training by Reducing - Internal Covariate Shift by Sergey Ioffe and Christian Szegedy - - In particular, this layer produces output tensors with the same - dimensionality as the input tensors, except that the mean and variances of - the elements have been standardized to 0 and 1 respectively. - - It should also be noted that when tensors with a num_samples() dimension of - 1 are passed to this layer it doesn't perform batch normalization. - Instead, it runs in "inference mode" where the learned linear normalizing - transformation is used to transform the tensor. - - Finally, after you finish training a batch normalized network, it is a good - idea to replace each bn_ layer with an affine_ layer because the affine_ - layer is faster and will never surprise you by performing batch - normalization on tensors that have a num_samples() dimension > 1. This allows - you to run large mini-batches of samples through your final network without - batch normalization executing at all. - !*/ - - public: - bn_( - ); - /*! - ensures - - #get_mode() == mode - - #get_running_stats_window_size() == 100 - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 0 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 1 - - #get_eps() == tt::DEFAULT_BATCH_NORM_EPS - !*/ - - explicit bn_( - unsigned long window_size, - double eps = tt::DEFAULT_BATCH_NORM_EPS - ); - /*! - requires - - eps > 0 - - window_size > 0 - ensures - - #get_mode() == mode - - #get_running_stats_window_size() == window_size - - #get_learning_rate_multiplier() == 1 - - #get_weight_decay_multiplier() == 0 - - #get_bias_learning_rate_multiplier() == 1 - - #get_bias_weight_decay_multiplier() == 1 - - #get_eps() == eps - !*/ - - layer_mode get_mode( - ) const; - /*! - ensures - - returns the mode of this layer, either CONV_MODE or FC_MODE. - If the mode is FC_MODE then the normalization is applied across the - samples in a tensor (i.e. k()*nr()*nc() different things will be - normalized). Otherwise, normalization is applied across everything - except for the k() dimension, resulting in there being only k() - normalization equations that are applied spatially over the tensor. - - Therefore, if you are putting batch normalization after a fully connected - layer you should use FC_MODE. Otherwise, if you are putting batch - normalization after a convolutional layer you should use CONV_MODE. - !*/ - - double get_eps( - ) const; - /*! - ensures - - When doing batch normalization, we are dividing by the standard - deviation. This epsilon value returned by this function is added to the - variance to prevent the division from dividing by zero. - !*/ - - unsigned long get_running_stats_window_size ( - ) const; - /*! - ensures - - Just as recommended in the batch normalization paper, this object keeps a - running average of the mean and standard deviations of the features. - These averages are used during "inference mode" so you can run a single - object through a batch normalized network. They are also what is used to - initialize an affine_ layer that is constructed from a bn_ layer. This - function returns the effective number of recent samples used to compute - the running average. - !*/ - - void set_running_stats_window_size ( - unsigned long new_window_size - ); - /*! - requires - - new_window_size > 0 - ensures - - #get_running_stats_window_size() == new_window_size - !*/ - - double get_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its parameters be - multiplied by get_learning_rate_multiplier(). - !*/ - - double get_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its parameters be - multiplied by get_weight_decay_multiplier(). - !*/ - - void set_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_learning_rate_multiplier() == val - !*/ - - void set_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_weight_decay_multiplier() == val - !*/ - - double get_bias_learning_rate_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the learning rate used to optimize its bias parameters be - multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier(). - !*/ - - double get_bias_weight_decay_multiplier( - ) const; - /*! - ensures - - returns a multiplier number. The interpretation is that this object is - requesting that the weight decay used to optimize its bias parameters be - multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier(). - !*/ - - void set_bias_learning_rate_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_learning_rate_multiplier() == val - !*/ - - void set_bias_weight_decay_multiplier( - double val - ); - /*! - requires - - val >= 0 - ensures - - #get_bias_weight_decay_multiplier() == val - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - template <typename SUBNET> - using bn_con = add_layer<bn_<CONV_MODE>, SUBNET>; - template <typename SUBNET> - using bn_fc = add_layer<bn_<FC_MODE>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - template <typename net_type> - void set_all_bn_running_stats_window_sizes ( - const net_type& net, - unsigned long new_window_size - ); - /*! - requires - - new_window_size > 0 - - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or - add_tag_layer. - ensures - - Sets the get_running_stats_window_size() field of all bn_ layers in net to - new_window_size. - !*/ - -// ---------------------------------------------------------------------------------------- - - class affine_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it applies a simple pointwise linear - transformation to an input tensor. You can think of it as having two - parameter tensors, A and B. If the input tensor is called INPUT then the - output of this layer is: - A*INPUT+B - where all operations are performed element wise and each sample in the - INPUT tensor is processed separately. - - Moreover, this object has two modes that effect the dimensionalities of A - and B and how they are applied to compute A*INPUT+B. If - get_mode()==FC_MODE then A and B each have the same dimensionality as the - input tensor, except their num_samples() dimensions are 1. If - get_mode()==CONV_MODE then A and B have all their dimensions set to 1 - except for k(), which is equal to INPUT.k(). - - In either case, the computation of A*INPUT+B is performed pointwise over all - the elements of INPUT using either: - OUTPUT(n,k,r,c) == A(1,k,r,c)*INPUT(n,k,r,c)+B(1,k,r,c) - or - OUTPUT(n,k,r,c) == A(1,k,1,1)*INPUT(n,k,r,c)+B(1,k,1,1) - as appropriate. - - - Finally, note that the parameters of this layer are not learnable and - therefore not modified during network updates. Instead, the layer will - perform the identity transformation unless it is initialized with a bn_ - layer, in which case it will perform whatever transformation the bn_ layer - has learned. - !*/ - - public: - - affine_( - ); - /*! - ensures - - #get_mode() == FC_MODE - !*/ - - affine_( - layer_mode mode - ); - /*! - ensures - - #get_mode() == mode - !*/ - - template < - layer_mode mode - > - affine_( - const bn_<mode>& layer - ); - /*! - ensures - - Constructs affine_ so that it performs the same transformation as the - supplied batch normalization layer. You would want to do this after you - finish training a network with bn_ layers because the affine_ layer will - execute faster. - - #get_mode() == layer.get_mode() - !*/ - - layer_mode get_mode( - ) const; - /*! - ensures - - returns the mode of this layer, either CONV_MODE or FC_MODE. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the - EXAMPLE_COMPUTATIONAL_LAYER_ interface. Also note that get_layer_params() - always returns an empty tensor since there are no learnable parameters in this - object. - !*/ - - }; - - template <typename SUBNET> - using affine = add_layer<affine_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - template < - long _nr, - long _nc, - int _stride_y, - int _stride_x, - int _padding_y = _stride_y!=1? 0 : _nr/2, - int _padding_x = _stride_x!=1? 0 : _nc/2 - > - class max_pool_ - { - /*! - REQUIREMENTS ON TEMPLATE ARGUMENTS - - _nr >= 0 - - _nc >= 0 - - _stride_y > 0 - - _stride_x > 0 - - _padding_y >= 0 - - _padding_x >= 0 - - if (_nr != 0) then - - _padding_y < _nr - - else - - _padding_y == 0 - - if (_nc != 0) then - - _padding_x < _nr - - else - - _padding_x == 0 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a max pooling layer that takes an - input tensor and downsamples it. It does this by sliding a window over the - images in an input tensor and outputting, for each channel, the maximum - element within the window. - - If _nr == 0 then it means the filter size covers all the rows in the input - tensor, similarly for the _nc parameter. To be precise, if we call the - input tensor IN and the output tensor OUT, then OUT is defined as follows: - - let FILT_NR == (nr()==0) ? IN.nr() : nr() - - let FILT_NC == (nc()==0) ? IN.nc() : nc() - - OUT.num_samples() == IN.num_samples() - - OUT.k() == IN.k() - - OUT.nr() == 1+(IN.nr() + 2*padding_y() - FILT_NR)/stride_y() - - OUT.nc() == 1+(IN.nc() + 2*padding_x() - FILT_NC)/stride_x() - - for all valid s, k, r, and c: - - image_plane(OUT,s,k)(r,c) == max(subm_clipped(image_plane(IN,s,k), - centered_rect(x*stride_x() + FILT_NC/2 - padding_x(), - y*stride_y() + FILT_NR/2 - padding_y(), - FILT_NC, - FILT_NR))) - !*/ - - public: - - max_pool_ ( - ); - /*! - ensures - - #nr() == _nr - - #nc() == _nc - - #stride_y() == _stride_y - - #stride_x() == _stride_x - - #padding_y() == _padding_y - - #padding_x() == _padding_x - !*/ - - long nr( - ) const; - /*! - ensures - - returns the number of rows in the pooling window or 0 if the window size - is "the entire input tensor". - !*/ - - long nc( - ) const; - /*! - ensures - - returns the number of rows in the pooling window or 0 if the window size - is "the entire input tensor". - !*/ - - long stride_y( - ) const; - /*! - ensures - - returns the vertical stride used when scanning the max pooling window - over an image. That is, each window will be moved stride_y() pixels down - at a time when it moves over the image. - !*/ - - long stride_x( - ) const; - /*! - ensures - - returns the horizontal stride used when scanning the max pooling window - over an image. That is, each window will be moved stride_x() pixels down - at a time when it moves over the image. - !*/ - - long padding_y( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the top and bottom - sides of the image. - !*/ - - long padding_x( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the left and right - sides of the image. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - }; - - template < - long nr, - long nc, - int stride_y, - int stride_x, - typename SUBNET - > - using max_pool = add_layer<max_pool_<nr,nc,stride_y,stride_x>, SUBNET>; - - template < - typename SUBNET - > - using max_pool_everything = add_layer<max_pool_<0,0,1,1>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - template < - long _nr, - long _nc, - int _stride_y, - int _stride_x, - int _padding_y = _stride_y!=1? 0 : _nr/2, - int _padding_x = _stride_x!=1? 0 : _nc/2 - > - class avg_pool_ - { - /*! - REQUIREMENTS ON TEMPLATE ARGUMENTS - - _nr >= 0 - - _nc >= 0 - - _stride_y > 0 - - _stride_x > 0 - - _padding_y >= 0 - - _padding_x >= 0 - - if (_nr != 0) then - - _padding_y < _nr - - else - - _padding_y == 0 - - if (_nc != 0) then - - _padding_x < _nr - - else - - _padding_x == 0 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines an average pooling layer that - takes an input tensor and downsamples it. It does this by sliding a window - over the images in an input tensor and outputting, for each channel, the - average element within the window. - - If _nr == 0 then it means the filter size covers all the rows in the input - tensor, similarly for the _nc parameter. To be precise, if we call the - input tensor IN and the output tensor OUT, then OUT is defined as follows: - - let FILT_NR == (nr()==0) ? IN.nr() : nr() - - let FILT_NC == (nc()==0) ? IN.nc() : nc() - - OUT.num_samples() == IN.num_samples() - - OUT.k() == IN.k() - - OUT.nr() == 1+(IN.nr() + 2*padding_y() - FILT_NR)/stride_y() - - OUT.nc() == 1+(IN.nc() + 2*padding_x() - FILT_NC)/stride_x() - - for all valid s, k, r, and c: - - image_plane(OUT,s,k)(r,c) == mean(subm_clipped(image_plane(IN,s,k), - centered_rect(x*stride_x() + FILT_NC/2 - padding_x(), - y*stride_y() + FILT_NR/2 - padding_y(), - FILT_NC, - FILT_NR))) - !*/ - - public: - - avg_pool_ ( - ); - /*! - ensures - - #nr() == _nr - - #nc() == _nc - - #stride_y() == _stride_y - - #stride_x() == _stride_x - - #padding_y() == _padding_y - - #padding_x() == _padding_x - !*/ - - long nr( - ) const; - /*! - ensures - - returns the number of rows in the pooling window or 0 if the window size - is "the entire input tensor". - !*/ - - long nc( - ) const; - /*! - ensures - - returns the number of rows in the pooling window or 0 if the window size - is "the entire input tensor". - !*/ - - long stride_y( - ) const; - /*! - ensures - - returns the vertical stride used when scanning the pooling window - over an image. That is, each window will be moved stride_y() pixels down - at a time when it moves over the image. - !*/ - - long stride_x( - ) const; - /*! - ensures - - returns the horizontal stride used when scanning the pooling window - over an image. That is, each window will be moved stride_x() pixels down - at a time when it moves over the image. - !*/ - - long padding_y( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the top and bottom - sides of the image. - !*/ - - long padding_x( - ) const; - /*! - ensures - - returns the number of pixels of zero padding added to the left and right - sides of the image. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - - }; - - template < - long nr, - long nc, - int stride_y, - int stride_x, - typename SUBNET - > - using avg_pool = add_layer<avg_pool_<nr,nc,stride_y,stride_x>, SUBNET>; - - template < - typename SUBNET - > - using avg_pool_everything = add_layer<avg_pool_<0,0,1,1>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class relu_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a rectified linear layer. - Therefore, it passes its inputs through the function - f(x)=max(x,0) - where f() is applied pointwise across the input tensor. - !*/ - - public: - - relu_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - }; - - template <typename SUBNET> - using relu = add_layer<relu_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class prelu_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a parametric rectified linear - layer. Therefore, it passes its inputs through the function - f(x) = x>0 ? x : p*x - where f() is applied pointwise across the input tensor and p is a scalar - parameter learned by this layer. - - - This is the layer type introduced in the paper: - He, Kaiming, et al. "Delving deep into rectifiers: Surpassing - human-level performance on imagenet classification." Proceedings of the - IEEE International Conference on Computer Vision. 2015. - !*/ - - public: - - explicit prelu_( - float initial_param_value = 0.25 - ); - /*! - ensures - - The p parameter will be initialized with initial_param_value. - - #get_initial_param_value() == initial_param_value. - !*/ - - float get_initial_param_value ( - ) const; - /*! - ensures - - returns the initial value of the prelu parameter. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - template <typename SUBNET> - using prelu = add_layer<prelu_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class sig_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a sigmoid layer. Therefore, it - passes its inputs through the function - f(x)=1/(1+exp(-x)) - where f() is applied pointwise across the input tensor. - !*/ - - public: - - sig_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - }; - - template <typename SUBNET> - using sig = add_layer<sig_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class htan_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a hyperbolic tangent layer. - Therefore, it passes its inputs through the function - f(x)=std::tanh(x) - where f() is applied pointwise across the input tensor. - !*/ - - public: - - htan_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - }; - - template <typename SUBNET> - using htan = add_layer<htan_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class softmax_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a softmax layer. To be precise, - we define the softmax function s(x) as: - s(x) == exp(x)/sum(exp(x)) - where x is a vector. Then this layer treats its input tensor as a - collection of multi-channel images and applies s() to each spatial location - in each image. In each application, the tensor::k() channel elements at - each position are input to s() and then replaced by the outputs of s(). - - This means that, for example, if you collapsed each output image to a 1 - channel image by adding the channels then you would end up with images - where each pixel value was 1. This is because the sum of the outputs of - s() will always be equal to 1. - !*/ - - public: - - softmax_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - }; - - template <typename SUBNET> - using softmax = add_layer<softmax_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - class softmax_all_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, it defines a softmax layer. To be precise, - we define the softmax function s(x) as: - s(x) == exp(x)/sum(exp(x)) - where x is a vector. Then this layer treats its input tensor as a - collection of tensor::num_samples() vectors and applies s() to each vector - in the tensor. Therefore, there are logically tensor::num_samples() - invocations of s(). - !*/ - - public: - - softmax_all_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ - interface. Note that this layer doesn't have any parameters, so the tensor - returned by get_layer_params() is always empty. - !*/ - }; - - template <typename SUBNET> - using softmax_all = add_layer<softmax_all_, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - template < - template<typename> class tag - > - class add_prev_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. This layer simply adds the output of two previous layers. - In particular, it adds the tensor from its immediate predecessor layer, - sub.get_output(), with the tensor from a deeper layer, - layer<tag>(sub).get_output(). - - Therefore, you supply a tag via add_prev_'s template argument that tells it - what layer to add to the output of the previous layer. The result of this - addition is output by add_prev_. Finally, the addition happens pointwise - according to 4D tensor arithmetic. If the dimensions don't match then - missing elements are presumed to be equal to 0. Moreover, each dimension - of the output tensor is equal to the maximum dimension of either of the - inputs. That is, if the tensors A and B are being added to produce C then: - - C.num_samples() == max(A.num_samples(), B.num_samples()) - - C.k() == max(A.k(), B.k()) - - C.nr() == max(A.nr(), B.nr()) - - C.nc() == max(A.nc(), B.nc()) - !*/ - - public: - add_prev_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - - template < - template<typename> class tag, - typename SUBNET - > - using add_prev = add_layer<add_prev_<tag>, SUBNET>; - - // Here we add some convenient aliases for using add_prev_ with the tag layers. - template <typename SUBNET> using add_prev1 = add_prev<tag1, SUBNET>; - template <typename SUBNET> using add_prev2 = add_prev<tag2, SUBNET>; - template <typename SUBNET> using add_prev3 = add_prev<tag3, SUBNET>; - template <typename SUBNET> using add_prev4 = add_prev<tag4, SUBNET>; - template <typename SUBNET> using add_prev5 = add_prev<tag5, SUBNET>; - template <typename SUBNET> using add_prev6 = add_prev<tag6, SUBNET>; - template <typename SUBNET> using add_prev7 = add_prev<tag7, SUBNET>; - template <typename SUBNET> using add_prev8 = add_prev<tag8, SUBNET>; - template <typename SUBNET> using add_prev9 = add_prev<tag9, SUBNET>; - template <typename SUBNET> using add_prev10 = add_prev<tag10, SUBNET>; - using add_prev1_ = add_prev_<tag1>; - using add_prev2_ = add_prev_<tag2>; - using add_prev3_ = add_prev_<tag3>; - using add_prev4_ = add_prev_<tag4>; - using add_prev5_ = add_prev_<tag5>; - using add_prev6_ = add_prev_<tag6>; - using add_prev7_ = add_prev_<tag7>; - using add_prev8_ = add_prev_<tag8>; - using add_prev9_ = add_prev_<tag9>; - using add_prev10_ = add_prev_<tag10>; - -// ---------------------------------------------------------------------------------------- - - template < - template<typename> class tag - > - class mult_prev_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. This layer simply multiplies the output of two previous - layers. In particular, it multiplies the tensor from its immediate - predecessor layer, sub.get_output(), with the tensor from a deeper layer, - layer<tag>(sub).get_output(). - - Therefore, you supply a tag via mult_prev_'s template argument that tells - it what layer to multiply with the output of the previous layer. The - result of this multiplication is output by mult_prev_. Finally, the - multiplication happens pointwise according to 4D tensor arithmetic. If the - dimensions don't match then missing elements are presumed to be equal to 0. - Moreover, each dimension of the output tensor is equal to the maximum - dimension of either of the inputs. That is, if the tensors A and B are - being multiplied to produce C then: - - C.num_samples() == max(A.num_samples(), B.num_samples()) - - C.k() == max(A.k(), B.k()) - - C.nr() == max(A.nr(), B.nr()) - - C.nc() == max(A.nc(), B.nc()) - !*/ - - public: - mult_prev_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - - template < - template<typename> class tag, - typename SUBNET - > - using mult_prev = add_layer<mult_prev_<tag>, SUBNET>; - - // Here we add some convenient aliases for using mult_prev_ with the tag layers. - template <typename SUBNET> using mult_prev1 = mult_prev<tag1, SUBNET>; - template <typename SUBNET> using mult_prev2 = mult_prev<tag2, SUBNET>; - template <typename SUBNET> using mult_prev3 = mult_prev<tag3, SUBNET>; - template <typename SUBNET> using mult_prev4 = mult_prev<tag4, SUBNET>; - template <typename SUBNET> using mult_prev5 = mult_prev<tag5, SUBNET>; - template <typename SUBNET> using mult_prev6 = mult_prev<tag6, SUBNET>; - template <typename SUBNET> using mult_prev7 = mult_prev<tag7, SUBNET>; - template <typename SUBNET> using mult_prev8 = mult_prev<tag8, SUBNET>; - template <typename SUBNET> using mult_prev9 = mult_prev<tag9, SUBNET>; - template <typename SUBNET> using mult_prev10 = mult_prev<tag10, SUBNET>; - using mult_prev1_ = mult_prev_<tag1>; - using mult_prev2_ = mult_prev_<tag2>; - using mult_prev3_ = mult_prev_<tag3>; - using mult_prev4_ = mult_prev_<tag4>; - using mult_prev5_ = mult_prev_<tag5>; - using mult_prev6_ = mult_prev_<tag6>; - using mult_prev7_ = mult_prev_<tag7>; - using mult_prev8_ = mult_prev_<tag8>; - using mult_prev9_ = mult_prev_<tag9>; - using mult_prev10_ = mult_prev_<tag10>; - -// ---------------------------------------------------------------------------------------- - - template < - template<typename> class tag - > - class scale_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. This layer scales the output channels of the tagged layer - by multiplying it with the output of the previous layer. To be specific: - - Let INPUT == layer<tag>(sub).get_output() - - Let SCALES == sub.get_output() - - This layer takes INPUT and SCALES as input. - - The output of this layer has the same dimensions as INPUT. - - This layer requires: - - SCALES.num_samples() == INPUT.num_samples() - - SCALES.k() == INPUT.k() - - SCALES.nr() == 1 - - SCALES.nc() == 1 - - The output tensor is produced by pointwise multiplying SCALES with - INPUT at each spatial location. Therefore, if OUT is the output of - this layer then we would have: - OUT(n,k,r,c) == INPUT(n,k,r,c)*SCALES(n,k) - !*/ - - public: - scale_( - ); - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - - template < - template<typename> class tag, - typename SUBNET - > - using scale = add_layer<scale_<tag>, SUBNET>; - - // Here we add some convenient aliases for using scale_ with the tag layers. - template <typename SUBNET> using scale1 = scale<tag1, SUBNET>; - template <typename SUBNET> using scale2 = scale<tag2, SUBNET>; - template <typename SUBNET> using scale3 = scale<tag3, SUBNET>; - template <typename SUBNET> using scale4 = scale<tag4, SUBNET>; - template <typename SUBNET> using scale5 = scale<tag5, SUBNET>; - template <typename SUBNET> using scale6 = scale<tag6, SUBNET>; - template <typename SUBNET> using scale7 = scale<tag7, SUBNET>; - template <typename SUBNET> using scale8 = scale<tag8, SUBNET>; - template <typename SUBNET> using scale9 = scale<tag9, SUBNET>; - template <typename SUBNET> using scale10 = scale<tag10, SUBNET>; - using scale1_ = scale_<tag1>; - using scale2_ = scale_<tag2>; - using scale3_ = scale_<tag3>; - using scale4_ = scale_<tag4>; - using scale5_ = scale_<tag5>; - using scale6_ = scale_<tag6>; - using scale7_ = scale_<tag7>; - using scale8_ = scale_<tag8>; - using scale9_ = scale_<tag9>; - using scale10_ = scale_<tag10>; - -// ---------------------------------------------------------------------------------------- - - template< - template<typename> class... TAG_TYPES - > - class concat_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. This layer simply concatenates the output of tagged layers. - Importantly, each input layer must have the same dimensions (i.e. - num_samples, nr, and nc) except for the k channel, which may vary. This is - because the concatenation happens along the k dimension. That is, the - output of this network is a tensor, OUT, that is the concatenation of the - tensors: - for each (tag in TAG_TYPES) - layer<tag>(subnet).get_output() - Therefore, out.num_samples(), out.nr(), and out.nc() match the dimensions - of the input tensors while OUT.k() is the sum of the input layer's k() - dimensions. - !*/ - - public: - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - dpoint map_input_to_output(dpoint p) const; - dpoint map_output_to_input(dpoint p) const; - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - - // concat layer definitions - template <template<typename> class TAG1, - template<typename> class TAG2, - typename SUBNET> - using concat2 = add_layer<concat_<TAG1, TAG2>, SUBNET>; - - template <template<typename> class TAG1, - template<typename> class TAG2, - template<typename> class TAG3, - typename SUBNET> - using concat3 = add_layer<concat_<TAG1, TAG2, TAG3>, SUBNET>; - - template <template<typename> class TAG1, - template<typename> class TAG2, - template<typename> class TAG3, - template<typename> class TAG4, - typename SUBNET> - using concat4 = add_layer<concat_<TAG1, TAG2, TAG3, TAG4>, SUBNET>; - - template <template<typename> class TAG1, - template<typename> class TAG2, - template<typename> class TAG3, - template<typename> class TAG4, - template<typename> class TAG5, - typename SUBNET> - using concat5 = add_layer<concat_<TAG1, TAG2, TAG3, TAG4, TAG5>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - - /*!A inception layer definitions !*/ - - // Now define inception layer tag types. These layer aliases allow creating - // the networks described in the paper: - // Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of - // the IEEE Conference on Computer Vision and Pattern Recognition. 2015. - // See the dnn_inception_ex.cpp example for a complete example of their use. Note also - // that we use tag ID numbers >= 1000 to avoid conflict with user's tag layers. - template <typename SUBNET> using itag0 = add_tag_layer< 1000 + 0, SUBNET>; - template <typename SUBNET> using itag1 = add_tag_layer< 1000 + 1, SUBNET>; - template <typename SUBNET> using itag2 = add_tag_layer< 1000 + 2, SUBNET>; - template <typename SUBNET> using itag3 = add_tag_layer< 1000 + 3, SUBNET>; - template <typename SUBNET> using itag4 = add_tag_layer< 1000 + 4, SUBNET>; - template <typename SUBNET> using itag5 = add_tag_layer< 1000 + 5, SUBNET>; - // skip to inception input - template <typename SUBNET> using iskip = add_skip_layer< itag0, SUBNET>; - - // here are some templates to be used for creating inception layer groups - template <template<typename>class B1, - template<typename>class B2, - typename SUBNET> - using inception2 = concat2<itag1, itag2, itag1<B1<iskip< itag2<B2< itag0<SUBNET>>>>>>>; - - template <template<typename>class B1, - template<typename>class B2, - template<typename>class B3, - typename SUBNET> - using inception3 = concat3<itag1, itag2, itag3, itag1<B1<iskip< itag2<B2<iskip< itag3<B3< itag0<SUBNET>>>>>>>>>>; - - template <template<typename>class B1, - template<typename>class B2, - template<typename>class B3, - template<typename>class B4, - typename SUBNET> - using inception4 = concat4<itag1, itag2, itag3, itag4, - itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4< itag0<SUBNET>>>>>>>>>>>>>; - - template <template<typename>class B1, - template<typename>class B2, - template<typename>class B3, - template<typename>class B4, - template<typename>class B5, - typename SUBNET> - using inception5 = concat5<itag1, itag2, itag3, itag4, itag5, - itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4<iskip< itag5<B5< itag0<SUBNET>>>>>>>>>>>>>>>>; - -// ---------------------------------------------------------------------------------------- - - const double DEFAULT_L2_NORM_EPS = 1e-5; - - class l2normalize_ - { - /*! - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. It takes tensors as input and L2 normalizes them. In particular, - it has the following properties: - - The output tensors from this layer have the same dimensions as the - input tensors. - - If you think of each input tensor as a set of tensor::num_samples() - vectors, then the output tensor contains the same vectors except they - have been length normalized so that their L2 norms are all 1. I.e. - for each vector v we will have ||v||==1. - !*/ - - public: - - explicit l2normalize_( - double eps = tt::DEFAULT_L2_NORM_EPS - ); - /*! - requires - - eps > 0 - ensures - - #get_eps() == eps - !*/ - - double get_eps( - ) const; - /*! - ensures - - When we normalize a vector we divide it by its L2 norm. However, the - get_eps() value is added to the squared norm prior to division to avoid - ever dividing by zero. - !*/ - - template <typename SUBNET> void setup (const SUBNET& sub); - void forward_inplace(const tensor& input, tensor& output); - void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - long _offset, - long _k, - long _nr, - long _nc - > - class extract_ - { - /*! - REQUIREMENTS ON TEMPLATE ARGUMENTS - - 0 <= _offset - - 0 < _k - - 0 < _nr - - 0 < _nc - - WHAT THIS OBJECT REPRESENTS - This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface - defined above. In particular, the output of this layer is simply a copy of - the input tensor. However, you can configure the extract layer to output - only some subset of the input tensor and also to reshape it. Therefore, - the dimensions of the tensor output by this layer are as follows (letting - IN be the input tensor and OUT the output tensor): - - OUT.num_samples() == IN.num_samples() - - OUT.k() == _k - - OUT.nr() == _nr - - OUT.nc() == _nc - - So the output will always have the same number of samples as the input, but - within each sample (the k,nr,nc part) we will copy only a subset of the - values. Moreover, the _offset parameter controls which part of each sample - we take. To be very precise, we will have: - - let IN_SIZE = IN.k()*IN.nr()*IN.nc() - - let OUT_SIZE = _k*_nr*_nc - - for i in range[0,IN.num_samples()) and j in range[0,OUT_SIZE): - - OUT.host()[i*OUT_SIZE+j] == IN.host()[i*IN_SIZE+_offset+j] - - - Finally, all this means that the input tensor to this layer must have a big - enough size to accommodate taking a _k*_nr*_nc slice from each of its - samples. - !*/ - - public: - - template <typename SUBNET> void setup (const SUBNET& sub); - template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output); - template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad); - const tensor& get_layer_params() const; - tensor& get_layer_params(); - /*! - These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface. - !*/ - }; - - template < - long offset, - long k, - long nr, - long nc, - typename SUBNET - > - using extract = add_layer<extract_<offset,k,nr,nc>, SUBNET>; - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DNn_LAYERS_ABSTRACT_H_ - |