summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/dnn/layers_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ml/dlib/dlib/dnn/layers_abstract.h2631
1 files changed, 2631 insertions, 0 deletions
diff --git a/ml/dlib/dlib/dnn/layers_abstract.h b/ml/dlib/dlib/dnn/layers_abstract.h
new file mode 100644
index 000000000..f07025ff8
--- /dev/null
+++ b/ml/dlib/dlib/dnn/layers_abstract.h
@@ -0,0 +1,2631 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_DNn_LAYERS_ABSTRACT_H_
+#ifdef DLIB_DNn_LAYERS_ABSTRACT_H_
+
+#include "tensor_abstract.h"
+#include "core_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ class SUBNET
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object represents a deep neural network. In particular, it is
+ the simplified interface through which layer objects interact with their
+ subnetworks. A layer's two important tasks are to (1) take outputs from its
+ subnetwork and forward propagate them through itself and (2) to backwards
+ propagate an error gradient through itself and onto its subnetwork.
+ The idea of a subnetwork is illustrated in the following diagram:
+
+ +---------------------------------------------------------+
+ | loss <-- layer1 <-- layer2 <-- ... <-- layern <-- input |
+ +---------------------------------------------------------+
+ ^ ^
+ \__ subnetwork for layer1 __/
+
+ Therefore, by "subnetwork" we mean the part of the network closer to the
+ input.
+
+ Note that there is no dlib::SUBNET type. It is shown here purely to
+ document the interface layer objects expect to see when they interact
+ with a network.
+ !*/
+
+ public:
+ // You aren't allowed to copy subnetworks from inside a layer.
+ SUBNET(const SUBNET&) = delete;
+ SUBNET& operator=(const SUBNET&) = delete;
+
+ const tensor& get_output(
+ ) const;
+ /*!
+ ensures
+ - returns the output of this subnetwork. This is the data that the next
+ layer in the network will take as input.
+ - have_same_dimensions(#get_gradient_input(), get_output()) == true
+ !*/
+
+ tensor& get_gradient_input(
+ );
+ /*!
+ ensures
+ - returns the error gradient for this subnetwork. That is, this is the
+ error gradient that this network will use to update itself. Therefore,
+ when performing back propagation, layers that sit on top of this
+ subnetwork write their back propagated error gradients into
+ get_gradient_input(). Or to put it another way, during back propagation,
+ layers take the contents of their get_gradient_input() and back propagate
+ it through themselves and store the results into their subnetwork's
+ get_gradient_input().
+ !*/
+
+ const NEXT_SUBNET& subnet(
+ ) const;
+ /*!
+ ensures
+ - returns the subnetwork of *this network. With respect to the diagram
+ above, if *this was layer1 then subnet() would return the network that
+ begins with layer2.
+ !*/
+
+ NEXT_SUBNET& subnet(
+ );
+ /*!
+ ensures
+ - returns the subnetwork of *this network. With respect to the diagram
+ above, if *this was layer1 then subnet() would return the network that
+ begins with layer2.
+ !*/
+
+ const layer_details_type& layer_details(
+ ) const;
+ /*!
+ ensures
+ - returns the layer_details_type instance that defines the behavior of the
+ layer at the top of this network. I.e. returns the layer details that
+ defines the behavior of the layer nearest to the network output rather
+ than the input layer. For computational layers, this is the object
+ implementing the EXAMPLE_COMPUTATIONAL_LAYER_ interface that defines the
+ layer's behavior.
+ !*/
+
+ unsigned int sample_expansion_factor (
+ ) const;
+ /*!
+ ensures
+ - When to_tensor() is invoked on this network's input layer it converts N
+ input objects into M samples, all stored inside a resizable_tensor. It
+ is always the case that M is some integer multiple of N.
+ sample_expansion_factor() returns the value of this multiplier. To be
+ very specific, it is always true that M==I*N where I is some integer.
+ This integer I is what is returned by sample_expansion_factor().
+
+ It should be noted that computational layers likely do not care about the
+ sample expansion factor. It is only really of concern inside a loss
+ layer where you need to know its value so that tensor samples can be
+ matched against truth objects. Moreover, in most cases the sample
+ expansion factor is 1.
+ !*/
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ class EXAMPLE_COMPUTATIONAL_LAYER_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ Each computational layer in a deep neural network can be thought of as a
+ function, f(data,parameters), that takes in a data tensor, some parameters,
+ and produces an output tensor. You create an entire deep network by
+ composing these functions. Importantly, you are able to use a wide range
+ of different functions to accommodate the task you are trying to
+ accomplish. Therefore, dlib includes a number of common layer types but if
+ you want to define your own then you simply implement a class with the same
+ interface as EXAMPLE_COMPUTATIONAL_LAYER_.
+
+ Note that there is no dlib::EXAMPLE_COMPUTATIONAL_LAYER_ type. It is shown
+ here purely to document the interface that a layer object must implement.
+
+ The central work of defining a layer is implementing the forward and backward
+ methods. When you do this you have four options:
+ - Implement the forward() and backward() methods according to the
+ specification shown below. Do not implement forward_inplace() and
+ backward_inplace().
+ - Implement the forward() and backward() methods according to the
+ specification shown below, except exclude the computed_output
+ parameter from backward(). Doing this will allow dlib to make some
+ layers execute in-place and therefore run a little faster and use
+ less memory. Do not implement forward_inplace() and
+ backward_inplace().
+ - Implement the forward_inplace() and backward_inplace() methods
+ according to the specification shown below. Do not implement
+ forward() and backward(). These in-place methods allow some types of
+ layers to be implemented more efficiently.
+ - Implement the forward_inplace() and backward_inplace() methods
+ according to the specification shown below, except exclude the
+ computed_output parameter from backward_inplace(). Doing this will
+ allow dlib to make some layers execute in-place and therefore run a
+ little faster and use less memory. Do not implement forward() and
+ backward().
+
+
+ It should also be noted that layers may define additional layer specific
+ fields and the solvers can use these fields as they see fit. For example,
+ some layers define get_learning_rate_multiplier() and
+ get_weight_decay_multiplier() methods. The solvers that come with dlib
+ look at these methods, if they exist, and adjust the learning rate or
+ weight decay for that layer according to the multiplier. Therefore, you
+ can add these methods to your layer types if you want, or even define new
+ fields and new solvers that use those fields in some way.
+ !*/
+
+ public:
+
+ EXAMPLE_COMPUTATIONAL_LAYER_(
+ );
+ /*!
+ ensures
+ - Default constructs this object. This function is not required to do
+ anything in particular but it must exist, that is, it is required that
+ layer objects be default constructable.
+ !*/
+
+ EXAMPLE_COMPUTATIONAL_LAYER_ (
+ const EXAMPLE_COMPUTATIONAL_LAYER_& item
+ );
+ /*!
+ ensures
+ - EXAMPLE_COMPUTATIONAL_LAYER_ objects are copy constructable
+ !*/
+
+ EXAMPLE_COMPUTATIONAL_LAYER_(
+ const some_other_layer_type& item
+ );
+ /*!
+ ensures
+ - Constructs this object from item. This form of constructor is optional
+ but it allows you to provide a conversion from one layer type to another.
+ For example, the following code is valid only if my_layer2 can be
+ constructed from my_layer1:
+ relu<fc<my_layer1<fc<input<matrix<float>>>>>> my_dnn1;
+ relu<fc<my_layer2<fc<input<matrix<float>>>>>> my_dnn2(my_dnn1);
+ This kind of pattern is useful if you want to use one type of layer
+ during training but a different type of layer during testing since it
+ allows you to easily convert between related deep neural network types.
+
+ Additionally, if you provide a constructor to build a layer from another
+ layer type you should also write your layer's deserialize() routine such
+ that it can read that other layer's serialized data in addition to your
+ own serialized data.
+ !*/
+
+ template <typename SUBNET>
+ void setup (
+ const SUBNET& sub
+ );
+ /*!
+ requires
+ - SUBNET implements the SUBNET interface defined at the top of this file.
+ ensures
+ - performs any necessary initial memory allocations and/or sets parameters
+ to their initial values prior to learning. Therefore, calling setup
+ destroys any previously learned parameters. Also, typically setup()
+ would look at the dimensions of the outputs of sub and configure the
+ number of parameters in *this accordingly.
+ !*/
+
+ template <typename SUBNET>
+ void forward(
+ const SUBNET& sub,
+ resizable_tensor& data_output
+ );
+ /*!
+ requires
+ - SUBNET implements the SUBNET interface defined at the top of this file.
+ - setup() has been called.
+ ensures
+ - Runs the output of the subnetwork through this layer and stores the
+ results into #data_output. In particular, forward() can use any of the
+ outputs in sub (e.g. sub.get_output(), sub.subnet().get_output(), etc.)
+ to compute whatever it wants.
+ !*/
+
+ template <typename SUBNET>
+ void backward(
+ const tensor& computed_output, // this parameter is optional
+ const tensor& gradient_input,
+ SUBNET& sub,
+ tensor& params_grad
+ );
+ /*!
+ requires
+ - SUBNET implements the SUBNET interface defined at the top of this file.
+ - setup() has been called.
+ - computed_output is the tensor resulting from calling forward(sub,computed_output).
+ Moreover, this was the most recent call to forward(). This means that
+ forward() is allowed to cache intermediate results so they can be used
+ during the backward computation.
+ - have_same_dimensions(gradient_input, computed_output) == true
+ - have_same_dimensions(sub.get_gradient_input(), sub.get_output()) == true
+ - have_same_dimensions(params_grad, get_layer_params()) == true
+ ensures
+ - This function outputs the gradients of this layer with respect to the
+ input data from sub and also with respect to this layer's parameters.
+ These gradients are stored into #sub and #params_grad, respectively. To be
+ precise, the gradients are taken of a function f(sub,get_layer_params())
+ which is defined thusly:
+ - Recalling that computed_output is a function of both sub and get_layer_params(),
+ since it is the result of calling forward(sub,computed_output):
+ let f(sub,get_layer_params()) == dot(computed_output, gradient_input)
+ Then we define the following gradient vectors:
+ - PARAMETER_GRADIENT == gradient of f(sub,get_layer_params()) with
+ respect to get_layer_params().
+ - for all valid I:
+ - DATA_GRADIENT_I == gradient of f(sub,get_layer_params()) with
+ respect to layer<I>(sub).get_output() (recall that forward() can
+ draw inputs from the immediate sub layer, sub.subnet(), or
+ any earlier layer. So you must consider the gradients with
+ respect to all inputs drawn from sub)
+ Finally, backward() outputs these gradients by performing:
+ - params_grad = PARAMETER_GRADIENT
+ - for all valid I:
+ - layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I
+ !*/
+
+ void forward_inplace(
+ const tensor& data_input,
+ tensor& data_output
+ );
+ /*!
+ requires
+ - have_same_dimensions(data_input,data_output) == true
+ - setup() has been called.
+ ensures
+ - Runs the data_input tensor through this layer and stores the output into
+ #data_output.
+ - This function supports in-place operation, i.e. having
+ is_same_object(data_input, data_output)==true
+ !*/
+
+ void backward_inplace(
+ const tensor& computed_output, // this parameter is optional
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor& params_grad
+ );
+ /*!
+ requires
+ - setup() has been called.
+ - computed_output is the tensor resulting from the most recent call to
+ forward_inplace(). This means that forward_inplace() is allowed to cache
+ intermediate results so they can be used during the backward computation.
+ - have_same_dimensions(gradient_input, data_grad) == true
+ - have_same_dimensions(gradient_input, computed_output) == true
+ - have_same_dimensions(params_grad, get_layer_params()) == true
+ ensures
+ - This function supports in-place operation, i.e. having
+ is_same_object(gradient_input, data_grad)==true
+ - This function outputs the gradients of this layer with respect to the
+ input data from a sublayer and also with respect to this layer's parameters.
+ These gradients are stored into #data_grad and #params_grad, respectively. To be
+ precise, the gradients are taken of a function f(data_input,get_layer_params())
+ which is defined thusly:
+ - Recalling that computed_output is a function of both the input to
+ forward_inplace() and get_layer_params(), since it is the result of
+ calling forward_inplace(data_input,computed_output):
+ let f(data_input,get_layer_params()) == dot(computed_output, gradient_input)
+ Then we define the following gradient vectors:
+ - PARAMETER_GRADIENT == gradient of f(data_input,get_layer_params()) with
+ respect to get_layer_params().
+ - DATA_GRADIENT == gradient of f(data_input,get_layer_params()) with respect
+ to data_input.
+ Finally, backward_inplace() outputs these gradients by performing:
+ - params_grad = PARAMETER_GRADIENT
+ - if (is_same_object(gradient_input, data_grad)) then
+ - data_grad = DATA_GRADIENT
+ - else
+ - data_grad += DATA_GRADIENT
+ !*/
+
+ const tensor& get_layer_params(
+ ) const;
+ /*!
+ ensures
+ - returns the parameters that define the behavior of forward().
+ !*/
+
+ tensor& get_layer_params(
+ );
+ /*!
+ ensures
+ - returns the parameters that define the behavior of forward().
+ !*/
+
+
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ /*!
+ These two functions are optional. If provided, they should map between
+ (column,row) coordinates in input and output tensors of forward(). Providing
+ these functions allows you to use global utility functions like
+ input_tensor_to_output_tensor().
+ !*/
+
+ void clean (
+ );
+ /*!
+ Implementing this function is optional. If you don't need it then you don't
+ have to provide a clean(). But if you do provide it then it must behave as
+ follows:
+
+ ensures
+ - calling clean() Causes this object to forget about everything except its
+ parameters. This is useful if your layer caches information between
+ forward and backward passes and you want to clean out that cache
+ information before saving the network to disk.
+ !*/
+
+ };
+
+ std::ostream& operator<<(std::ostream& out, const EXAMPLE_COMPUTATIONAL_LAYER_& item);
+ /*!
+ print a string describing this layer.
+ !*/
+
+ void to_xml(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out);
+ /*!
+ This function is optional, but required if you want to print your networks with
+ net_to_xml(). Therefore, to_xml() prints a layer as XML.
+ !*/
+
+ void serialize(const EXAMPLE_COMPUTATIONAL_LAYER_& item, std::ostream& out);
+ void deserialize(EXAMPLE_COMPUTATIONAL_LAYER_& item, std::istream& in);
+ /*!
+ provides serialization support
+ !*/
+
+ // For each layer you define, always define an add_layer template so that layers can be
+ // easily composed. Moreover, the convention is that the layer class ends with an _
+ // while the add_layer template has the same name but without the trailing _.
+ template <typename SUBNET>
+ using EXAMPLE_COMPUTATIONAL_LAYER = add_layer<EXAMPLE_COMPUTATIONAL_LAYER_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ enum fc_bias_mode
+ {
+ FC_HAS_BIAS = 0,
+ FC_NO_BIAS = 1
+ };
+
+ struct num_fc_outputs
+ {
+ num_fc_outputs(unsigned long n) : num_outputs(n) {}
+ unsigned long num_outputs;
+ };
+
+ template <
+ unsigned long num_outputs,
+ fc_bias_mode bias_mode
+ >
+ class fc_
+ {
+ /*!
+ REQUIREMENTS ON num_outputs
+ num_outputs > 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a fully connected layer that
+ takes an input tensor and multiplies it by a weight matrix and outputs the
+ results.
+
+ The dimensions of the tensors output by this layer are as follows (letting
+ IN be the input tensor and OUT the output tensor):
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == get_num_outputs()
+ - OUT.nr() == 1
+ - OUT.nc() == 1
+ !*/
+
+ public:
+
+ fc_(
+ );
+ /*!
+ ensures
+ - #get_num_outputs() == num_outputs
+ - #get_bias_mode() == bias_mode
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 1
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 0
+ !*/
+
+ fc_(
+ num_fc_outputs o
+ );
+ /*!
+ ensures
+ - #get_num_outputs() == o.num_outputs
+ - #get_bias_mode() == bias_mode
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 1
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 0
+ !*/
+
+ unsigned long get_num_outputs (
+ ) const;
+ /*!
+ ensures
+ - This layer outputs column vectors that contain get_num_outputs()
+ elements. That is, the output tensor T from forward() will be such that:
+ - T.num_samples() == however many samples were given to forward().
+ - T.k() == get_num_outputs()
+ - The rest of the dimensions of T will be 1.
+ !*/
+
+ void set_num_outputs(
+ long num
+ );
+ /*!
+ requires
+ - num > 0
+ - get_layer_params().size() == 0 || get_num_outputs() == num
+ (i.e. You can't change the number of outputs in fc_ if the parameter
+ tensor has already been allocated.)
+ ensures
+ - #get_num_outputs() == num
+ !*/
+
+ fc_bias_mode get_bias_mode (
+ ) const;
+ /*!
+ ensures
+ - returns the bias mode which determines if this layer includes bias terms.
+ That is, if the bias mode is FC_HAS_BIAS then a different constant scalar
+ is added to each of the outputs of this layer.
+ !*/
+
+ double get_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its parameters be
+ multiplied by get_learning_rate_multiplier().
+ !*/
+
+ double get_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its parameters be
+ multiplied by get_weight_decay_multiplier().
+ !*/
+
+ void set_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_learning_rate_multiplier() == val
+ !*/
+
+ void set_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_weight_decay_multiplier() == val
+ !*/
+
+ double get_bias_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its bias parameters be
+ multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier().
+ !*/
+
+ double get_bias_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its bias parameters be
+ multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier().
+ !*/
+
+ void set_bias_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_learning_rate_multiplier() == val
+ !*/
+
+ void set_bias_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_weight_decay_multiplier() == val
+ !*/
+
+ alias_tensor_const_instance get_weights(
+ ) const;
+ /*!
+ ensures
+ - returns an alias of get_layer_params(), containing the weights matrix of
+ the fully connected layer.
+ - #get_weights().num_samples() is the number of elements in input sample,
+ i.e. sublayer's output's k * nc * nr.
+ - #get_bias().k() == #get_num_outputs()
+ - if get_bias_mode() == FC_HAS_BIAS:
+ - #get_layer_params().size() == (#get_weights().size() + #get_biases().size())
+ - else:
+ - #get_layer_params().size() == #get_weights().size()
+ !*/
+
+ alias_tensor_instance get_weights(
+ );
+ /*!
+ ensures
+ - returns an alias of get_layer_params(), containing the weights matrix of
+ the fully connected layer.
+ - #get_weights().num_samples() is the number of elements in input sample,
+ i.e. sublayer's output's k * nc * nr.
+ - #get_bias().k() == #get_num_outputs()
+ - if get_bias_mode() == FC_HAS_BIAS:
+ - #get_layer_params().size() == (#get_weights().size() + #get_biases().size())
+ - else:
+ - #get_layer_params().size() == #get_weights().size()
+ !*/
+
+ alias_tensor_const_instance get_biases(
+ ) const;
+ /*!
+ requires
+ - #get_bias_mode() == FC_HAS_BIAS
+ ensures
+ - returns an alias of get_layer_params(), containing the bias vector of
+ the fully connected layer.
+ - #get_bias().num_samples() == 1
+ - #get_bias().k() == #get_num_outputs()
+ - #get_layer_params().size() == (#get_weights().size() + #get_biases().size())
+ !*/
+
+ alias_tensor_instance get_biases(
+ );
+ /*!
+ requires
+ - #get_bias_mode() == FC_HAS_BIAS
+ ensures
+ - returns an alias of get_layer_params(), containing the bias vector of
+ the fully connected layer.
+ - #get_bias().num_samples() == 1
+ - #get_bias().k() == #get_num_outputs()
+ - #get_layer_params().size() == (#get_weights().size() + #get_biases().size())
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+
+ };
+
+ template <
+ unsigned long num_outputs,
+ typename SUBNET
+ >
+ using fc = add_layer<fc_<num_outputs,FC_HAS_BIAS>, SUBNET>;
+
+ template <
+ unsigned long num_outputs,
+ typename SUBNET
+ >
+ using fc_no_bias = add_layer<fc_<num_outputs,FC_NO_BIAS>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ struct num_con_outputs
+ {
+ num_con_outputs(unsigned long n) : num_outputs(n) {}
+ unsigned long num_outputs;
+ };
+
+ template <
+ long _num_filters,
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class con_
+ {
+ /*!
+ REQUIREMENTS ON TEMPLATE ARGUMENTS
+ - _num_filters > 0
+ - _nr >= 0
+ - _nc >= 0
+ - _stride_y > 0
+ - _stride_x > 0
+ - _padding_y >= 0
+ - _padding_x >= 0
+ - Also, we require that:
+ - if (_nr == 0) then
+ - _padding_y == 0
+ - else
+ - _padding_y < _nr
+ - if (_nc == 0) then
+ - _padding_x == 0
+ - else
+ - _padding_x < _nc
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a convolution layer that takes an
+ input tensor (nominally representing an image) and convolves it with a set
+ of filters and then outputs the results.
+
+ The dimensions of the tensors output by this layer are as follows (letting
+ IN be the input tensor and OUT the output tensor):
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == num_filters()
+ - OUT.nr() == 1+(IN.nr() + 2*padding_y() - nr())/stride_y()
+ - OUT.nc() == 1+(IN.nc() + 2*padding_x() - nc())/stride_x()
+
+ Note also that setting _nr or _nc to 0 has a special meaning of "set the
+ filter size equal to the input image size". Specifically, it means:
+ - if (_nr == 0) then
+ - nr() == IN.nr()
+ - OUT.nr() == 1
+ - if (_nc == 0) then
+ - nc() == IN.nc()
+ - OUT.nc() == 1
+ !*/
+
+ public:
+ con_(
+ );
+ /*!
+ ensures
+ - #num_filters() == _num_filters
+ - #nr() == _nr
+ - #nc() == _nc
+ - #stride_y() == _stride_y
+ - #stride_x() == _stride_x
+ - #padding_y() == _padding_y
+ - #padding_x() == _padding_x
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 1
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 0
+ !*/
+
+ con_(
+ num_con_outputs o
+ );
+ /*!
+ ensures
+ - #num_filters() == o.num_outputs
+ - #nr() == _nr
+ - #nc() == _nc
+ - #stride_y() == _stride_y
+ - #stride_x() == _stride_x
+ - #padding_y() == _padding_y
+ - #padding_x() == _padding_x
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 1
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 0
+ !*/
+
+ long num_filters(
+ ) const;
+ /*!
+ ensures
+ - returns the number of filters contained in this layer. The k dimension
+ of the output tensors produced by this layer will be equal to the number
+ of filters.
+ !*/
+
+ void set_num_filters(
+ long num
+ );
+ /*!
+ requires
+ - num > 0
+ - get_layer_params().size() == 0 || num_filters() == num
+ (i.e. You can't change the number of filters in con_ if the parameter
+ tensor has already been allocated.)
+ ensures
+ - #num_filters() == num
+ !*/
+
+ long nr(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in the filters in this layer. Note that if
+ nr()==0 then it means the size of the filter is not yet assigned, but
+ once setup() is called nr() will be set to the input tensor's nr().
+ Therefore, nr()==0 has the special interpretation of "be the same size as
+ the input tensor".
+ !*/
+
+ long nc(
+ ) const;
+ /*!
+ ensures
+ - returns the number of columns in the filters in this layer. Note that if
+ nc()==0 then it means the size of the filter is not yet assigned, but
+ once setup() is called nc() will be set to the input tensor's nc().
+ Therefore, nc()==0 has the special interpretation of "be the same size as
+ the input tensor".
+ !*/
+
+ long stride_y(
+ ) const;
+ /*!
+ ensures
+ - returns the vertical stride used when convolving the filters over an
+ image. That is, each filter will be moved stride_y() pixels down at a
+ time when it moves over the image.
+ !*/
+
+ long stride_x(
+ ) const;
+ /*!
+ ensures
+ - returns the horizontal stride used when convolving the filters over an
+ image. That is, each filter will be moved stride_x() pixels right at a
+ time when it moves over the image.
+ !*/
+
+ long padding_y(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the top and bottom
+ sides of the image.
+ !*/
+
+ long padding_x(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the left and right
+ sides of the image.
+ !*/
+
+ double get_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its parameters be
+ multiplied by get_learning_rate_multiplier().
+ !*/
+
+ double get_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its parameters be
+ multiplied by get_weight_decay_multiplier().
+ !*/
+
+ void set_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_learning_rate_multiplier() == val
+ !*/
+
+ void set_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_weight_decay_multiplier() == val
+ !*/
+
+ double get_bias_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its bias parameters be
+ multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier().
+ !*/
+
+ double get_bias_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its bias parameters be
+ multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier().
+ !*/
+
+ void set_bias_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_learning_rate_multiplier() == val
+ !*/
+
+ void set_bias_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_weight_decay_multiplier() == val
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+
+ };
+
+ template <
+ long num_filters,
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using con = add_layer<con_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _num_filters,
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class cont_
+ {
+ /*!
+ REQUIREMENTS ON TEMPLATE ARGUMENTS
+ All of them must be > 0.
+ Also, we require that:
+ - 0 <= _padding_y && _padding_y < _nr
+ - 0 <= _padding_x && _padding_x < _nc
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a transposed convolution layer
+ that takes an input tensor and transpose convolves (sometimes called
+ "deconvolution") it with a set of filters and then outputs the results.
+
+ This is essentially a convolutional layer that allows fractional strides.
+ Therefore, you can make output tensors that are larger than the input
+ tensors using this layer type.
+
+
+ The dimensions of the tensors output by this layer are as follows (letting
+ IN be the input tensor and OUT the output tensor):
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == num_filters()
+ - OUT.nr() == stride_y()*(IN.nr()-1) + nr() - 2*padding_y()
+ - OUT.nc() == stride_x()*(IN.nc()-1) + nc() - 2*padding_x()
+ !*/
+
+ public:
+ cont_(
+ );
+ /*!
+ ensures
+ - #num_filters() == _num_filters
+ - #nr() == _nr
+ - #nc() == _nc
+ - #stride_y() == _stride_y
+ - #stride_x() == _stride_x
+ - #padding_y() == _padding_y
+ - #padding_x() == _padding_x
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 1
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 0
+ !*/
+
+ cont_(
+ num_con_outputs o
+ );
+ /*!
+ ensures
+ - #num_filters() == o.num_outputs
+ - #nr() == _nr
+ - #nc() == _nc
+ - #stride_y() == _stride_y
+ - #stride_x() == _stride_x
+ - #padding_y() == _padding_y
+ - #padding_x() == _padding_x
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 1
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 0
+ !*/
+
+ long num_filters(
+ ) const;
+ /*!
+ ensures
+ - returns the number of filters contained in this layer. The k dimension
+ of the output tensors produced by this layer will be equal to the number
+ of filters.
+ !*/
+
+ void set_num_filters(
+ long num
+ );
+ /*!
+ requires
+ - num > 0
+ - get_layer_params().size() == 0 || num_filters() == num
+ (i.e. You can't change the number of filters in cont_ if the parameter
+ tensor has already been allocated.)
+ ensures
+ - #num_filters() == num
+ !*/
+
+ long nr(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in the filters in this layer.
+ !*/
+
+ long nc(
+ ) const;
+ /*!
+ ensures
+ - returns the number of columns in the filters in this layer.
+ !*/
+
+ long stride_y(
+ ) const;
+ /*!
+ ensures
+ - returns the vertical stride used when convolving the filters over an
+ image. That is, each filter will be moved 1.0/stride_y() pixels down at
+ a time when it moves over the image.
+ !*/
+
+ long stride_x(
+ ) const;
+ /*!
+ ensures
+ - returns the horizontal stride used when convolving the filters over an
+ image. That is, each filter will be moved 1.0/stride_x() pixels right at
+ a time when it moves over the image.
+ !*/
+
+ long padding_y(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the top and bottom
+ sides of the image.
+ !*/
+
+ long padding_x(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the left and right
+ sides of the image.
+ !*/
+
+ double get_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its parameters be
+ multiplied by get_learning_rate_multiplier().
+ !*/
+
+ double get_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its parameters be
+ multiplied by get_weight_decay_multiplier().
+ !*/
+
+ void set_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_learning_rate_multiplier() == val
+ !*/
+
+ void set_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_weight_decay_multiplier() == val
+ !*/
+
+ double get_bias_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its bias parameters be
+ multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier().
+ !*/
+
+ double get_bias_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its bias parameters be
+ multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier().
+ !*/
+
+ void set_bias_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_learning_rate_multiplier() == val
+ !*/
+
+ void set_bias_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_weight_decay_multiplier() == val
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+
+ };
+
+ template <
+ long num_filters,
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using cont = add_layer<cont_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ int scale_y,
+ int scale_x
+ >
+ class upsample_
+ {
+ /*!
+ REQUIREMENTS ON TEMPLATE ARGUMENTS
+ All of them must be >= 1.
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it allows you to upsample a layer using
+ bilinear interpolation. To be very specific, it upsamples each of the
+ channels in an input tensor. Therefore, if IN is the input tensor to this
+ layer and OUT the output tensor, then we will have:
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == IN.k()
+ - OUT.nr() == IN.nr()*scale_y
+ - OUT.nc() == IN.nr()*scale_x
+ - for all valid i,k: image_plane(OUT,i,k) is a copy of
+ image_plane(IN,i,k) that has been bilinearly interpolated to fit into
+ the shape of image_plane(OUT,i,k).
+ !*/
+ public:
+
+ upsample_(
+ );
+ /*!
+ ensures
+ - This object has no state, so the constructor does nothing, aside from
+ providing default constructability.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+ template <
+ int scale,
+ typename SUBNET
+ >
+ using upsample = add_layer<upsample_<scale,scale>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class dropout_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a dropout layer. Therefore, it
+ passes its inputs through the stochastic function f(x) which outputs either
+ 0 or x. The probability of 0 being output is given by the drop_rate
+ argument to this object's constructor.
+
+ Note that, after you finish training a network with dropout, it is a good
+ idea to replace each dropout_ layer with a multiply_ layer because the
+ multiply_ layer is faster and deterministic.
+ !*/
+
+ public:
+
+ explicit dropout_(
+ float drop_rate = 0.5
+ );
+ /*!
+ requires
+ - 0 <= drop_rate <= 1
+ ensures
+ - #get_drop_rate() == drop_rate
+ !*/
+
+ float get_drop_rate (
+ ) const;
+ /*!
+ ensures
+ - returns the probability that an individual input value to this layer will
+ be replaced with 0.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using dropout = add_layer<dropout_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class multiply_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a basic layer that just
+ multiplies its input tensor with a constant value and returns the result.
+ It therefore has no learnable parameters.
+ !*/
+
+ public:
+ explicit multiply_(
+ float val = 0.5
+ );
+ /*!
+ ensures
+ - #get_multiply_value() == val
+ !*/
+
+ multiply_ (
+ const dropout_& item
+ );
+ /*!
+ ensures
+ - #get_multiply_value() == 1-item.get_drop_rate()
+ (i.e. We construct the multiply_ layer so that it is essentially a
+ deterministic version of the given dropout_ layer)
+ !*/
+
+ float get_multiply_value (
+ ) const;
+ /*!
+ ensures
+ - this layer simply multiplies its input tensor by get_multiply_value() and
+ produces the result as output.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using multiply = add_layer<multiply_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ enum layer_mode
+ {
+ CONV_MODE = 0, // convolutional mode
+ FC_MODE = 1 // fully connected mode
+ };
+
+ const double DEFAULT_BATCH_NORM_EPS = 0.0001;
+
+ template <
+ layer_mode mode
+ >
+ class bn_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a batch normalization layer that
+ implements the method described in the paper:
+ Batch Normalization: Accelerating Deep Network Training by Reducing
+ Internal Covariate Shift by Sergey Ioffe and Christian Szegedy
+
+ In particular, this layer produces output tensors with the same
+ dimensionality as the input tensors, except that the mean and variances of
+ the elements have been standardized to 0 and 1 respectively.
+
+ It should also be noted that when tensors with a num_samples() dimension of
+ 1 are passed to this layer it doesn't perform batch normalization.
+ Instead, it runs in "inference mode" where the learned linear normalizing
+ transformation is used to transform the tensor.
+
+ Finally, after you finish training a batch normalized network, it is a good
+ idea to replace each bn_ layer with an affine_ layer because the affine_
+ layer is faster and will never surprise you by performing batch
+ normalization on tensors that have a num_samples() dimension > 1. This allows
+ you to run large mini-batches of samples through your final network without
+ batch normalization executing at all.
+ !*/
+
+ public:
+ bn_(
+ );
+ /*!
+ ensures
+ - #get_mode() == mode
+ - #get_running_stats_window_size() == 100
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 0
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 1
+ - #get_eps() == tt::DEFAULT_BATCH_NORM_EPS
+ !*/
+
+ explicit bn_(
+ unsigned long window_size,
+ double eps = tt::DEFAULT_BATCH_NORM_EPS
+ );
+ /*!
+ requires
+ - eps > 0
+ - window_size > 0
+ ensures
+ - #get_mode() == mode
+ - #get_running_stats_window_size() == window_size
+ - #get_learning_rate_multiplier() == 1
+ - #get_weight_decay_multiplier() == 0
+ - #get_bias_learning_rate_multiplier() == 1
+ - #get_bias_weight_decay_multiplier() == 1
+ - #get_eps() == eps
+ !*/
+
+ layer_mode get_mode(
+ ) const;
+ /*!
+ ensures
+ - returns the mode of this layer, either CONV_MODE or FC_MODE.
+ If the mode is FC_MODE then the normalization is applied across the
+ samples in a tensor (i.e. k()*nr()*nc() different things will be
+ normalized). Otherwise, normalization is applied across everything
+ except for the k() dimension, resulting in there being only k()
+ normalization equations that are applied spatially over the tensor.
+
+ Therefore, if you are putting batch normalization after a fully connected
+ layer you should use FC_MODE. Otherwise, if you are putting batch
+ normalization after a convolutional layer you should use CONV_MODE.
+ !*/
+
+ double get_eps(
+ ) const;
+ /*!
+ ensures
+ - When doing batch normalization, we are dividing by the standard
+ deviation. This epsilon value returned by this function is added to the
+ variance to prevent the division from dividing by zero.
+ !*/
+
+ unsigned long get_running_stats_window_size (
+ ) const;
+ /*!
+ ensures
+ - Just as recommended in the batch normalization paper, this object keeps a
+ running average of the mean and standard deviations of the features.
+ These averages are used during "inference mode" so you can run a single
+ object through a batch normalized network. They are also what is used to
+ initialize an affine_ layer that is constructed from a bn_ layer. This
+ function returns the effective number of recent samples used to compute
+ the running average.
+ !*/
+
+ void set_running_stats_window_size (
+ unsigned long new_window_size
+ );
+ /*!
+ requires
+ - new_window_size > 0
+ ensures
+ - #get_running_stats_window_size() == new_window_size
+ !*/
+
+ double get_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its parameters be
+ multiplied by get_learning_rate_multiplier().
+ !*/
+
+ double get_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its parameters be
+ multiplied by get_weight_decay_multiplier().
+ !*/
+
+ void set_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_learning_rate_multiplier() == val
+ !*/
+
+ void set_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_weight_decay_multiplier() == val
+ !*/
+
+ double get_bias_learning_rate_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the learning rate used to optimize its bias parameters be
+ multiplied by get_learning_rate_multiplier()*get_bias_learning_rate_multiplier().
+ !*/
+
+ double get_bias_weight_decay_multiplier(
+ ) const;
+ /*!
+ ensures
+ - returns a multiplier number. The interpretation is that this object is
+ requesting that the weight decay used to optimize its bias parameters be
+ multiplied by get_weight_decay_multiplier()*get_bias_weight_decay_multiplier().
+ !*/
+
+ void set_bias_learning_rate_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_learning_rate_multiplier() == val
+ !*/
+
+ void set_bias_weight_decay_multiplier(
+ double val
+ );
+ /*!
+ requires
+ - val >= 0
+ ensures
+ - #get_bias_weight_decay_multiplier() == val
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using bn_con = add_layer<bn_<CONV_MODE>, SUBNET>;
+ template <typename SUBNET>
+ using bn_fc = add_layer<bn_<FC_MODE>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename net_type>
+ void set_all_bn_running_stats_window_sizes (
+ const net_type& net,
+ unsigned long new_window_size
+ );
+ /*!
+ requires
+ - new_window_size > 0
+ - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+ add_tag_layer.
+ ensures
+ - Sets the get_running_stats_window_size() field of all bn_ layers in net to
+ new_window_size.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ class affine_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it applies a simple pointwise linear
+ transformation to an input tensor. You can think of it as having two
+ parameter tensors, A and B. If the input tensor is called INPUT then the
+ output of this layer is:
+ A*INPUT+B
+ where all operations are performed element wise and each sample in the
+ INPUT tensor is processed separately.
+
+ Moreover, this object has two modes that effect the dimensionalities of A
+ and B and how they are applied to compute A*INPUT+B. If
+ get_mode()==FC_MODE then A and B each have the same dimensionality as the
+ input tensor, except their num_samples() dimensions are 1. If
+ get_mode()==CONV_MODE then A and B have all their dimensions set to 1
+ except for k(), which is equal to INPUT.k().
+
+ In either case, the computation of A*INPUT+B is performed pointwise over all
+ the elements of INPUT using either:
+ OUTPUT(n,k,r,c) == A(1,k,r,c)*INPUT(n,k,r,c)+B(1,k,r,c)
+ or
+ OUTPUT(n,k,r,c) == A(1,k,1,1)*INPUT(n,k,r,c)+B(1,k,1,1)
+ as appropriate.
+
+
+ Finally, note that the parameters of this layer are not learnable and
+ therefore not modified during network updates. Instead, the layer will
+ perform the identity transformation unless it is initialized with a bn_
+ layer, in which case it will perform whatever transformation the bn_ layer
+ has learned.
+ !*/
+
+ public:
+
+ affine_(
+ );
+ /*!
+ ensures
+ - #get_mode() == FC_MODE
+ !*/
+
+ affine_(
+ layer_mode mode
+ );
+ /*!
+ ensures
+ - #get_mode() == mode
+ !*/
+
+ template <
+ layer_mode mode
+ >
+ affine_(
+ const bn_<mode>& layer
+ );
+ /*!
+ ensures
+ - Constructs affine_ so that it performs the same transformation as the
+ supplied batch normalization layer. You would want to do this after you
+ finish training a network with bn_ layers because the affine_ layer will
+ execute faster.
+ - #get_mode() == layer.get_mode()
+ !*/
+
+ layer_mode get_mode(
+ ) const;
+ /*!
+ ensures
+ - returns the mode of this layer, either CONV_MODE or FC_MODE.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the
+ EXAMPLE_COMPUTATIONAL_LAYER_ interface. Also note that get_layer_params()
+ always returns an empty tensor since there are no learnable parameters in this
+ object.
+ !*/
+
+ };
+
+ template <typename SUBNET>
+ using affine = add_layer<affine_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class max_pool_
+ {
+ /*!
+ REQUIREMENTS ON TEMPLATE ARGUMENTS
+ - _nr >= 0
+ - _nc >= 0
+ - _stride_y > 0
+ - _stride_x > 0
+ - _padding_y >= 0
+ - _padding_x >= 0
+ - if (_nr != 0) then
+ - _padding_y < _nr
+ - else
+ - _padding_y == 0
+ - if (_nc != 0) then
+ - _padding_x < _nr
+ - else
+ - _padding_x == 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a max pooling layer that takes an
+ input tensor and downsamples it. It does this by sliding a window over the
+ images in an input tensor and outputting, for each channel, the maximum
+ element within the window.
+
+ If _nr == 0 then it means the filter size covers all the rows in the input
+ tensor, similarly for the _nc parameter. To be precise, if we call the
+ input tensor IN and the output tensor OUT, then OUT is defined as follows:
+ - let FILT_NR == (nr()==0) ? IN.nr() : nr()
+ - let FILT_NC == (nc()==0) ? IN.nc() : nc()
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == IN.k()
+ - OUT.nr() == 1+(IN.nr() + 2*padding_y() - FILT_NR)/stride_y()
+ - OUT.nc() == 1+(IN.nc() + 2*padding_x() - FILT_NC)/stride_x()
+ - for all valid s, k, r, and c:
+ - image_plane(OUT,s,k)(r,c) == max(subm_clipped(image_plane(IN,s,k),
+ centered_rect(x*stride_x() + FILT_NC/2 - padding_x(),
+ y*stride_y() + FILT_NR/2 - padding_y(),
+ FILT_NC,
+ FILT_NR)))
+ !*/
+
+ public:
+
+ max_pool_ (
+ );
+ /*!
+ ensures
+ - #nr() == _nr
+ - #nc() == _nc
+ - #stride_y() == _stride_y
+ - #stride_x() == _stride_x
+ - #padding_y() == _padding_y
+ - #padding_x() == _padding_x
+ !*/
+
+ long nr(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in the pooling window or 0 if the window size
+ is "the entire input tensor".
+ !*/
+
+ long nc(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in the pooling window or 0 if the window size
+ is "the entire input tensor".
+ !*/
+
+ long stride_y(
+ ) const;
+ /*!
+ ensures
+ - returns the vertical stride used when scanning the max pooling window
+ over an image. That is, each window will be moved stride_y() pixels down
+ at a time when it moves over the image.
+ !*/
+
+ long stride_x(
+ ) const;
+ /*!
+ ensures
+ - returns the horizontal stride used when scanning the max pooling window
+ over an image. That is, each window will be moved stride_x() pixels down
+ at a time when it moves over the image.
+ !*/
+
+ long padding_y(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the top and bottom
+ sides of the image.
+ !*/
+
+ long padding_x(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the left and right
+ sides of the image.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+ };
+
+ template <
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using max_pool = add_layer<max_pool_<nr,nc,stride_y,stride_x>, SUBNET>;
+
+ template <
+ typename SUBNET
+ >
+ using max_pool_everything = add_layer<max_pool_<0,0,1,1>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class avg_pool_
+ {
+ /*!
+ REQUIREMENTS ON TEMPLATE ARGUMENTS
+ - _nr >= 0
+ - _nc >= 0
+ - _stride_y > 0
+ - _stride_x > 0
+ - _padding_y >= 0
+ - _padding_x >= 0
+ - if (_nr != 0) then
+ - _padding_y < _nr
+ - else
+ - _padding_y == 0
+ - if (_nc != 0) then
+ - _padding_x < _nr
+ - else
+ - _padding_x == 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines an average pooling layer that
+ takes an input tensor and downsamples it. It does this by sliding a window
+ over the images in an input tensor and outputting, for each channel, the
+ average element within the window.
+
+ If _nr == 0 then it means the filter size covers all the rows in the input
+ tensor, similarly for the _nc parameter. To be precise, if we call the
+ input tensor IN and the output tensor OUT, then OUT is defined as follows:
+ - let FILT_NR == (nr()==0) ? IN.nr() : nr()
+ - let FILT_NC == (nc()==0) ? IN.nc() : nc()
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == IN.k()
+ - OUT.nr() == 1+(IN.nr() + 2*padding_y() - FILT_NR)/stride_y()
+ - OUT.nc() == 1+(IN.nc() + 2*padding_x() - FILT_NC)/stride_x()
+ - for all valid s, k, r, and c:
+ - image_plane(OUT,s,k)(r,c) == mean(subm_clipped(image_plane(IN,s,k),
+ centered_rect(x*stride_x() + FILT_NC/2 - padding_x(),
+ y*stride_y() + FILT_NR/2 - padding_y(),
+ FILT_NC,
+ FILT_NR)))
+ !*/
+
+ public:
+
+ avg_pool_ (
+ );
+ /*!
+ ensures
+ - #nr() == _nr
+ - #nc() == _nc
+ - #stride_y() == _stride_y
+ - #stride_x() == _stride_x
+ - #padding_y() == _padding_y
+ - #padding_x() == _padding_x
+ !*/
+
+ long nr(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in the pooling window or 0 if the window size
+ is "the entire input tensor".
+ !*/
+
+ long nc(
+ ) const;
+ /*!
+ ensures
+ - returns the number of rows in the pooling window or 0 if the window size
+ is "the entire input tensor".
+ !*/
+
+ long stride_y(
+ ) const;
+ /*!
+ ensures
+ - returns the vertical stride used when scanning the pooling window
+ over an image. That is, each window will be moved stride_y() pixels down
+ at a time when it moves over the image.
+ !*/
+
+ long stride_x(
+ ) const;
+ /*!
+ ensures
+ - returns the horizontal stride used when scanning the pooling window
+ over an image. That is, each window will be moved stride_x() pixels down
+ at a time when it moves over the image.
+ !*/
+
+ long padding_y(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the top and bottom
+ sides of the image.
+ !*/
+
+ long padding_x(
+ ) const;
+ /*!
+ ensures
+ - returns the number of pixels of zero padding added to the left and right
+ sides of the image.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+
+ };
+
+ template <
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using avg_pool = add_layer<avg_pool_<nr,nc,stride_y,stride_x>, SUBNET>;
+
+ template <
+ typename SUBNET
+ >
+ using avg_pool_everything = add_layer<avg_pool_<0,0,1,1>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class relu_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a rectified linear layer.
+ Therefore, it passes its inputs through the function
+ f(x)=max(x,0)
+ where f() is applied pointwise across the input tensor.
+ !*/
+
+ public:
+
+ relu_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using relu = add_layer<relu_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class prelu_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a parametric rectified linear
+ layer. Therefore, it passes its inputs through the function
+ f(x) = x>0 ? x : p*x
+ where f() is applied pointwise across the input tensor and p is a scalar
+ parameter learned by this layer.
+
+
+ This is the layer type introduced in the paper:
+ He, Kaiming, et al. "Delving deep into rectifiers: Surpassing
+ human-level performance on imagenet classification." Proceedings of the
+ IEEE International Conference on Computer Vision. 2015.
+ !*/
+
+ public:
+
+ explicit prelu_(
+ float initial_param_value = 0.25
+ );
+ /*!
+ ensures
+ - The p parameter will be initialized with initial_param_value.
+ - #get_initial_param_value() == initial_param_value.
+ !*/
+
+ float get_initial_param_value (
+ ) const;
+ /*!
+ ensures
+ - returns the initial value of the prelu parameter.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using prelu = add_layer<prelu_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class sig_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a sigmoid layer. Therefore, it
+ passes its inputs through the function
+ f(x)=1/(1+exp(-x))
+ where f() is applied pointwise across the input tensor.
+ !*/
+
+ public:
+
+ sig_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using sig = add_layer<sig_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class htan_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a hyperbolic tangent layer.
+ Therefore, it passes its inputs through the function
+ f(x)=std::tanh(x)
+ where f() is applied pointwise across the input tensor.
+ !*/
+
+ public:
+
+ htan_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using htan = add_layer<htan_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class softmax_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a softmax layer. To be precise,
+ we define the softmax function s(x) as:
+ s(x) == exp(x)/sum(exp(x))
+ where x is a vector. Then this layer treats its input tensor as a
+ collection of multi-channel images and applies s() to each spatial location
+ in each image. In each application, the tensor::k() channel elements at
+ each position are input to s() and then replaced by the outputs of s().
+
+ This means that, for example, if you collapsed each output image to a 1
+ channel image by adding the channels then you would end up with images
+ where each pixel value was 1. This is because the sum of the outputs of
+ s() will always be equal to 1.
+ !*/
+
+ public:
+
+ softmax_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using softmax = add_layer<softmax_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class softmax_all_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, it defines a softmax layer. To be precise,
+ we define the softmax function s(x) as:
+ s(x) == exp(x)/sum(exp(x))
+ where x is a vector. Then this layer treats its input tensor as a
+ collection of tensor::num_samples() vectors and applies s() to each vector
+ in the tensor. Therefore, there are logically tensor::num_samples()
+ invocations of s().
+ !*/
+
+ public:
+
+ softmax_all_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_
+ interface. Note that this layer doesn't have any parameters, so the tensor
+ returned by get_layer_params() is always empty.
+ !*/
+ };
+
+ template <typename SUBNET>
+ using softmax_all = add_layer<softmax_all_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ template<typename> class tag
+ >
+ class add_prev_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. This layer simply adds the output of two previous layers.
+ In particular, it adds the tensor from its immediate predecessor layer,
+ sub.get_output(), with the tensor from a deeper layer,
+ layer<tag>(sub).get_output().
+
+ Therefore, you supply a tag via add_prev_'s template argument that tells it
+ what layer to add to the output of the previous layer. The result of this
+ addition is output by add_prev_. Finally, the addition happens pointwise
+ according to 4D tensor arithmetic. If the dimensions don't match then
+ missing elements are presumed to be equal to 0. Moreover, each dimension
+ of the output tensor is equal to the maximum dimension of either of the
+ inputs. That is, if the tensors A and B are being added to produce C then:
+ - C.num_samples() == max(A.num_samples(), B.num_samples())
+ - C.k() == max(A.k(), B.k())
+ - C.nr() == max(A.nr(), B.nr())
+ - C.nc() == max(A.nc(), B.nc())
+ !*/
+
+ public:
+ add_prev_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+
+ template <
+ template<typename> class tag,
+ typename SUBNET
+ >
+ using add_prev = add_layer<add_prev_<tag>, SUBNET>;
+
+ // Here we add some convenient aliases for using add_prev_ with the tag layers.
+ template <typename SUBNET> using add_prev1 = add_prev<tag1, SUBNET>;
+ template <typename SUBNET> using add_prev2 = add_prev<tag2, SUBNET>;
+ template <typename SUBNET> using add_prev3 = add_prev<tag3, SUBNET>;
+ template <typename SUBNET> using add_prev4 = add_prev<tag4, SUBNET>;
+ template <typename SUBNET> using add_prev5 = add_prev<tag5, SUBNET>;
+ template <typename SUBNET> using add_prev6 = add_prev<tag6, SUBNET>;
+ template <typename SUBNET> using add_prev7 = add_prev<tag7, SUBNET>;
+ template <typename SUBNET> using add_prev8 = add_prev<tag8, SUBNET>;
+ template <typename SUBNET> using add_prev9 = add_prev<tag9, SUBNET>;
+ template <typename SUBNET> using add_prev10 = add_prev<tag10, SUBNET>;
+ using add_prev1_ = add_prev_<tag1>;
+ using add_prev2_ = add_prev_<tag2>;
+ using add_prev3_ = add_prev_<tag3>;
+ using add_prev4_ = add_prev_<tag4>;
+ using add_prev5_ = add_prev_<tag5>;
+ using add_prev6_ = add_prev_<tag6>;
+ using add_prev7_ = add_prev_<tag7>;
+ using add_prev8_ = add_prev_<tag8>;
+ using add_prev9_ = add_prev_<tag9>;
+ using add_prev10_ = add_prev_<tag10>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ template<typename> class tag
+ >
+ class mult_prev_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. This layer simply multiplies the output of two previous
+ layers. In particular, it multiplies the tensor from its immediate
+ predecessor layer, sub.get_output(), with the tensor from a deeper layer,
+ layer<tag>(sub).get_output().
+
+ Therefore, you supply a tag via mult_prev_'s template argument that tells
+ it what layer to multiply with the output of the previous layer. The
+ result of this multiplication is output by mult_prev_. Finally, the
+ multiplication happens pointwise according to 4D tensor arithmetic. If the
+ dimensions don't match then missing elements are presumed to be equal to 0.
+ Moreover, each dimension of the output tensor is equal to the maximum
+ dimension of either of the inputs. That is, if the tensors A and B are
+ being multiplied to produce C then:
+ - C.num_samples() == max(A.num_samples(), B.num_samples())
+ - C.k() == max(A.k(), B.k())
+ - C.nr() == max(A.nr(), B.nr())
+ - C.nc() == max(A.nc(), B.nc())
+ !*/
+
+ public:
+ mult_prev_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+
+ template <
+ template<typename> class tag,
+ typename SUBNET
+ >
+ using mult_prev = add_layer<mult_prev_<tag>, SUBNET>;
+
+ // Here we add some convenient aliases for using mult_prev_ with the tag layers.
+ template <typename SUBNET> using mult_prev1 = mult_prev<tag1, SUBNET>;
+ template <typename SUBNET> using mult_prev2 = mult_prev<tag2, SUBNET>;
+ template <typename SUBNET> using mult_prev3 = mult_prev<tag3, SUBNET>;
+ template <typename SUBNET> using mult_prev4 = mult_prev<tag4, SUBNET>;
+ template <typename SUBNET> using mult_prev5 = mult_prev<tag5, SUBNET>;
+ template <typename SUBNET> using mult_prev6 = mult_prev<tag6, SUBNET>;
+ template <typename SUBNET> using mult_prev7 = mult_prev<tag7, SUBNET>;
+ template <typename SUBNET> using mult_prev8 = mult_prev<tag8, SUBNET>;
+ template <typename SUBNET> using mult_prev9 = mult_prev<tag9, SUBNET>;
+ template <typename SUBNET> using mult_prev10 = mult_prev<tag10, SUBNET>;
+ using mult_prev1_ = mult_prev_<tag1>;
+ using mult_prev2_ = mult_prev_<tag2>;
+ using mult_prev3_ = mult_prev_<tag3>;
+ using mult_prev4_ = mult_prev_<tag4>;
+ using mult_prev5_ = mult_prev_<tag5>;
+ using mult_prev6_ = mult_prev_<tag6>;
+ using mult_prev7_ = mult_prev_<tag7>;
+ using mult_prev8_ = mult_prev_<tag8>;
+ using mult_prev9_ = mult_prev_<tag9>;
+ using mult_prev10_ = mult_prev_<tag10>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ template<typename> class tag
+ >
+ class scale_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. This layer scales the output channels of the tagged layer
+ by multiplying it with the output of the previous layer. To be specific:
+ - Let INPUT == layer<tag>(sub).get_output()
+ - Let SCALES == sub.get_output()
+ - This layer takes INPUT and SCALES as input.
+ - The output of this layer has the same dimensions as INPUT.
+ - This layer requires:
+ - SCALES.num_samples() == INPUT.num_samples()
+ - SCALES.k() == INPUT.k()
+ - SCALES.nr() == 1
+ - SCALES.nc() == 1
+ - The output tensor is produced by pointwise multiplying SCALES with
+ INPUT at each spatial location. Therefore, if OUT is the output of
+ this layer then we would have:
+ OUT(n,k,r,c) == INPUT(n,k,r,c)*SCALES(n,k)
+ !*/
+
+ public:
+ scale_(
+ );
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+
+ template <
+ template<typename> class tag,
+ typename SUBNET
+ >
+ using scale = add_layer<scale_<tag>, SUBNET>;
+
+ // Here we add some convenient aliases for using scale_ with the tag layers.
+ template <typename SUBNET> using scale1 = scale<tag1, SUBNET>;
+ template <typename SUBNET> using scale2 = scale<tag2, SUBNET>;
+ template <typename SUBNET> using scale3 = scale<tag3, SUBNET>;
+ template <typename SUBNET> using scale4 = scale<tag4, SUBNET>;
+ template <typename SUBNET> using scale5 = scale<tag5, SUBNET>;
+ template <typename SUBNET> using scale6 = scale<tag6, SUBNET>;
+ template <typename SUBNET> using scale7 = scale<tag7, SUBNET>;
+ template <typename SUBNET> using scale8 = scale<tag8, SUBNET>;
+ template <typename SUBNET> using scale9 = scale<tag9, SUBNET>;
+ template <typename SUBNET> using scale10 = scale<tag10, SUBNET>;
+ using scale1_ = scale_<tag1>;
+ using scale2_ = scale_<tag2>;
+ using scale3_ = scale_<tag3>;
+ using scale4_ = scale_<tag4>;
+ using scale5_ = scale_<tag5>;
+ using scale6_ = scale_<tag6>;
+ using scale7_ = scale_<tag7>;
+ using scale8_ = scale_<tag8>;
+ using scale9_ = scale_<tag9>;
+ using scale10_ = scale_<tag10>;
+
+// ----------------------------------------------------------------------------------------
+
+ template<
+ template<typename> class... TAG_TYPES
+ >
+ class concat_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. This layer simply concatenates the output of tagged layers.
+ Importantly, each input layer must have the same dimensions (i.e.
+ num_samples, nr, and nc) except for the k channel, which may vary. This is
+ because the concatenation happens along the k dimension. That is, the
+ output of this network is a tensor, OUT, that is the concatenation of the
+ tensors:
+ for each (tag in TAG_TYPES)
+ layer<tag>(subnet).get_output()
+ Therefore, out.num_samples(), out.nr(), and out.nc() match the dimensions
+ of the input tensors while OUT.k() is the sum of the input layer's k()
+ dimensions.
+ !*/
+
+ public:
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ dpoint map_input_to_output(dpoint p) const;
+ dpoint map_output_to_input(dpoint p) const;
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+
+ // concat layer definitions
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ typename SUBNET>
+ using concat2 = add_layer<concat_<TAG1, TAG2>, SUBNET>;
+
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ template<typename> class TAG3,
+ typename SUBNET>
+ using concat3 = add_layer<concat_<TAG1, TAG2, TAG3>, SUBNET>;
+
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ template<typename> class TAG3,
+ template<typename> class TAG4,
+ typename SUBNET>
+ using concat4 = add_layer<concat_<TAG1, TAG2, TAG3, TAG4>, SUBNET>;
+
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ template<typename> class TAG3,
+ template<typename> class TAG4,
+ template<typename> class TAG5,
+ typename SUBNET>
+ using concat5 = add_layer<concat_<TAG1, TAG2, TAG3, TAG4, TAG5>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ /*!A inception layer definitions !*/
+
+ // Now define inception layer tag types. These layer aliases allow creating
+ // the networks described in the paper:
+ // Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of
+ // the IEEE Conference on Computer Vision and Pattern Recognition. 2015.
+ // See the dnn_inception_ex.cpp example for a complete example of their use. Note also
+ // that we use tag ID numbers >= 1000 to avoid conflict with user's tag layers.
+ template <typename SUBNET> using itag0 = add_tag_layer< 1000 + 0, SUBNET>;
+ template <typename SUBNET> using itag1 = add_tag_layer< 1000 + 1, SUBNET>;
+ template <typename SUBNET> using itag2 = add_tag_layer< 1000 + 2, SUBNET>;
+ template <typename SUBNET> using itag3 = add_tag_layer< 1000 + 3, SUBNET>;
+ template <typename SUBNET> using itag4 = add_tag_layer< 1000 + 4, SUBNET>;
+ template <typename SUBNET> using itag5 = add_tag_layer< 1000 + 5, SUBNET>;
+ // skip to inception input
+ template <typename SUBNET> using iskip = add_skip_layer< itag0, SUBNET>;
+
+ // here are some templates to be used for creating inception layer groups
+ template <template<typename>class B1,
+ template<typename>class B2,
+ typename SUBNET>
+ using inception2 = concat2<itag1, itag2, itag1<B1<iskip< itag2<B2< itag0<SUBNET>>>>>>>;
+
+ template <template<typename>class B1,
+ template<typename>class B2,
+ template<typename>class B3,
+ typename SUBNET>
+ using inception3 = concat3<itag1, itag2, itag3, itag1<B1<iskip< itag2<B2<iskip< itag3<B3< itag0<SUBNET>>>>>>>>>>;
+
+ template <template<typename>class B1,
+ template<typename>class B2,
+ template<typename>class B3,
+ template<typename>class B4,
+ typename SUBNET>
+ using inception4 = concat4<itag1, itag2, itag3, itag4,
+ itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4< itag0<SUBNET>>>>>>>>>>>>>;
+
+ template <template<typename>class B1,
+ template<typename>class B2,
+ template<typename>class B3,
+ template<typename>class B4,
+ template<typename>class B5,
+ typename SUBNET>
+ using inception5 = concat5<itag1, itag2, itag3, itag4, itag5,
+ itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4<iskip< itag5<B5< itag0<SUBNET>>>>>>>>>>>>>>>>;
+
+// ----------------------------------------------------------------------------------------
+
+ const double DEFAULT_L2_NORM_EPS = 1e-5;
+
+ class l2normalize_
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. It takes tensors as input and L2 normalizes them. In particular,
+ it has the following properties:
+ - The output tensors from this layer have the same dimensions as the
+ input tensors.
+ - If you think of each input tensor as a set of tensor::num_samples()
+ vectors, then the output tensor contains the same vectors except they
+ have been length normalized so that their L2 norms are all 1. I.e.
+ for each vector v we will have ||v||==1.
+ !*/
+
+ public:
+
+ explicit l2normalize_(
+ double eps = tt::DEFAULT_L2_NORM_EPS
+ );
+ /*!
+ requires
+ - eps > 0
+ ensures
+ - #get_eps() == eps
+ !*/
+
+ double get_eps(
+ ) const;
+ /*!
+ ensures
+ - When we normalize a vector we divide it by its L2 norm. However, the
+ get_eps() value is added to the squared norm prior to division to avoid
+ ever dividing by zero.
+ !*/
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ void forward_inplace(const tensor& input, tensor& output);
+ void backward_inplace(const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _offset,
+ long _k,
+ long _nr,
+ long _nc
+ >
+ class extract_
+ {
+ /*!
+ REQUIREMENTS ON TEMPLATE ARGUMENTS
+ - 0 <= _offset
+ - 0 < _k
+ - 0 < _nr
+ - 0 < _nc
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+ defined above. In particular, the output of this layer is simply a copy of
+ the input tensor. However, you can configure the extract layer to output
+ only some subset of the input tensor and also to reshape it. Therefore,
+ the dimensions of the tensor output by this layer are as follows (letting
+ IN be the input tensor and OUT the output tensor):
+ - OUT.num_samples() == IN.num_samples()
+ - OUT.k() == _k
+ - OUT.nr() == _nr
+ - OUT.nc() == _nc
+
+ So the output will always have the same number of samples as the input, but
+ within each sample (the k,nr,nc part) we will copy only a subset of the
+ values. Moreover, the _offset parameter controls which part of each sample
+ we take. To be very precise, we will have:
+ - let IN_SIZE = IN.k()*IN.nr()*IN.nc()
+ - let OUT_SIZE = _k*_nr*_nc
+ - for i in range[0,IN.num_samples()) and j in range[0,OUT_SIZE):
+ - OUT.host()[i*OUT_SIZE+j] == IN.host()[i*IN_SIZE+_offset+j]
+
+
+ Finally, all this means that the input tensor to this layer must have a big
+ enough size to accommodate taking a _k*_nr*_nc slice from each of its
+ samples.
+ !*/
+
+ public:
+
+ template <typename SUBNET> void setup (const SUBNET& sub);
+ template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+ template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+ const tensor& get_layer_params() const;
+ tensor& get_layer_params();
+ /*!
+ These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+ !*/
+ };
+
+ template <
+ long offset,
+ long k,
+ long nr,
+ long nc,
+ typename SUBNET
+ >
+ using extract = add_layer<extract_<offset,k,nr,nc>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DNn_LAYERS_ABSTRACT_H_
+