summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/dnn/layers.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/dnn/layers.h')
-rw-r--r--ml/dlib/dlib/dnn/layers.h3244
1 files changed, 3244 insertions, 0 deletions
diff --git a/ml/dlib/dlib/dnn/layers.h b/ml/dlib/dlib/dnn/layers.h
new file mode 100644
index 000000000..91436f635
--- /dev/null
+++ b/ml/dlib/dlib/dnn/layers.h
@@ -0,0 +1,3244 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_LAYERS_H_
+#define DLIB_DNn_LAYERS_H_
+
+#include "layers_abstract.h"
+#include "tensor.h"
+#include "core.h"
+#include <iostream>
+#include <string>
+#include "../rand.h"
+#include "../string.h"
+#include "tensor_tools.h"
+#include "../vectorstream.h"
+#include "utilities.h"
+#include <sstream>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ struct num_con_outputs
+ {
+ num_con_outputs(unsigned long n) : num_outputs(n) {}
+ unsigned long num_outputs;
+ };
+
+ template <
+ long _num_filters,
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class con_
+ {
+ public:
+
+ static_assert(_num_filters > 0, "The number of filters must be > 0");
+ static_assert(_nr >= 0, "The number of rows in a filter must be >= 0");
+ static_assert(_nc >= 0, "The number of columns in a filter must be >= 0");
+ static_assert(_stride_y > 0, "The filter stride must be > 0");
+ static_assert(_stride_x > 0, "The filter stride must be > 0");
+ static_assert(_nr==0 || (0 <= _padding_y && _padding_y < _nr), "The padding must be smaller than the filter size.");
+ static_assert(_nc==0 || (0 <= _padding_x && _padding_x < _nc), "The padding must be smaller than the filter size.");
+ static_assert(_nr!=0 || 0 == _padding_y, "If _nr==0 then the padding must be set to 0 as well.");
+ static_assert(_nc!=0 || 0 == _padding_x, "If _nr==0 then the padding must be set to 0 as well.");
+
+ con_(
+ num_con_outputs o
+ ) :
+ learning_rate_multiplier(1),
+ weight_decay_multiplier(1),
+ bias_learning_rate_multiplier(1),
+ bias_weight_decay_multiplier(0),
+ num_filters_(o.num_outputs),
+ padding_y_(_padding_y),
+ padding_x_(_padding_x)
+ {
+ DLIB_CASSERT(num_filters_ > 0);
+ }
+
+ con_() : con_(num_con_outputs(_num_filters)) {}
+
+ long num_filters() const { return num_filters_; }
+ long nr() const
+ {
+ if (_nr==0)
+ return filters.nr();
+ else
+ return _nr;
+ }
+ long nc() const
+ {
+ if (_nc==0)
+ return filters.nc();
+ else
+ return _nc;
+ }
+ long stride_y() const { return _stride_y; }
+ long stride_x() const { return _stride_x; }
+ long padding_y() const { return padding_y_; }
+ long padding_x() const { return padding_x_; }
+
+ void set_num_filters(long num)
+ {
+ DLIB_CASSERT(num > 0);
+ if (num != num_filters_)
+ {
+ DLIB_CASSERT(get_layer_params().size() == 0,
+ "You can't change the number of filters in con_ if the parameter tensor has already been allocated.");
+ num_filters_ = num;
+ }
+ }
+
+ double get_learning_rate_multiplier () const { return learning_rate_multiplier; }
+ double get_weight_decay_multiplier () const { return weight_decay_multiplier; }
+ void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
+ void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; }
+
+ double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; }
+ double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; }
+ void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; }
+ void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; }
+
+ inline dpoint map_input_to_output (
+ dpoint p
+ ) const
+ {
+ p.x() = (p.x()+padding_x()-nc()/2)/stride_x();
+ p.y() = (p.y()+padding_y()-nr()/2)/stride_y();
+ return p;
+ }
+
+ inline dpoint map_output_to_input (
+ dpoint p
+ ) const
+ {
+ p.x() = p.x()*stride_x() - padding_x() + nc()/2;
+ p.y() = p.y()*stride_y() - padding_y() + nr()/2;
+ return p;
+ }
+
+ con_ (
+ const con_& item
+ ) :
+ params(item.params),
+ filters(item.filters),
+ biases(item.biases),
+ learning_rate_multiplier(item.learning_rate_multiplier),
+ weight_decay_multiplier(item.weight_decay_multiplier),
+ bias_learning_rate_multiplier(item.bias_learning_rate_multiplier),
+ bias_weight_decay_multiplier(item.bias_weight_decay_multiplier),
+ num_filters_(item.num_filters_),
+ padding_y_(item.padding_y_),
+ padding_x_(item.padding_x_)
+ {
+ // this->conv is non-copyable and basically stateless, so we have to write our
+ // own copy to avoid trying to copy it and getting an error.
+ }
+
+ con_& operator= (
+ const con_& item
+ )
+ {
+ if (this == &item)
+ return *this;
+
+ // this->conv is non-copyable and basically stateless, so we have to write our
+ // own copy to avoid trying to copy it and getting an error.
+ params = item.params;
+ filters = item.filters;
+ biases = item.biases;
+ padding_y_ = item.padding_y_;
+ padding_x_ = item.padding_x_;
+ learning_rate_multiplier = item.learning_rate_multiplier;
+ weight_decay_multiplier = item.weight_decay_multiplier;
+ bias_learning_rate_multiplier = item.bias_learning_rate_multiplier;
+ bias_weight_decay_multiplier = item.bias_weight_decay_multiplier;
+ num_filters_ = item.num_filters_;
+ return *this;
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& sub)
+ {
+ const long filt_nr = _nr!=0 ? _nr : sub.get_output().nr();
+ const long filt_nc = _nc!=0 ? _nc : sub.get_output().nc();
+
+ long num_inputs = filt_nr*filt_nc*sub.get_output().k();
+ long num_outputs = num_filters_;
+ // allocate params for the filters and also for the filter bias values.
+ params.set_size(num_inputs*num_filters_ + num_filters_);
+
+ dlib::rand rnd(std::rand());
+ randomize_parameters(params, num_inputs+num_outputs, rnd);
+
+ filters = alias_tensor(num_filters_, sub.get_output().k(), filt_nr, filt_nc);
+ biases = alias_tensor(1,num_filters_);
+
+ // set the initial bias values to zero
+ biases(params,filters.size()) = 0;
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ conv.setup(sub.get_output(),
+ filters(params,0),
+ _stride_y,
+ _stride_x,
+ padding_y_,
+ padding_x_);
+ conv(false, output,
+ sub.get_output(),
+ filters(params,0));
+
+ tt::add(1,output,1,biases(params,filters.size()));
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
+ {
+ conv.get_gradient_for_data (true, gradient_input, filters(params,0), sub.get_gradient_input());
+ // no dpoint computing the parameter gradients if they won't be used.
+ if (learning_rate_multiplier != 0)
+ {
+ auto filt = filters(params_grad,0);
+ conv.get_gradient_for_filters (false, gradient_input, sub.get_output(), filt);
+ auto b = biases(params_grad, filters.size());
+ tt::assign_conv_bias_gradient(b, gradient_input);
+ }
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const con_& item, std::ostream& out)
+ {
+ serialize("con_4", out);
+ serialize(item.params, out);
+ serialize(item.num_filters_, out);
+ serialize(_nr, out);
+ serialize(_nc, out);
+ serialize(_stride_y, out);
+ serialize(_stride_x, out);
+ serialize(item.padding_y_, out);
+ serialize(item.padding_x_, out);
+ serialize(item.filters, out);
+ serialize(item.biases, out);
+ serialize(item.learning_rate_multiplier, out);
+ serialize(item.weight_decay_multiplier, out);
+ serialize(item.bias_learning_rate_multiplier, out);
+ serialize(item.bias_weight_decay_multiplier, out);
+ }
+
+ friend void deserialize(con_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ long nr;
+ long nc;
+ int stride_y;
+ int stride_x;
+ if (version == "con_4")
+ {
+ deserialize(item.params, in);
+ deserialize(item.num_filters_, in);
+ deserialize(nr, in);
+ deserialize(nc, in);
+ deserialize(stride_y, in);
+ deserialize(stride_x, in);
+ deserialize(item.padding_y_, in);
+ deserialize(item.padding_x_, in);
+ deserialize(item.filters, in);
+ deserialize(item.biases, in);
+ deserialize(item.learning_rate_multiplier, in);
+ deserialize(item.weight_decay_multiplier, in);
+ deserialize(item.bias_learning_rate_multiplier, in);
+ deserialize(item.bias_weight_decay_multiplier, in);
+ if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::con_");
+ if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::con_");
+ if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::con_");
+ if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::con_");
+ if (stride_y != _stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::con_");
+ if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_");
+ }
+ else
+ {
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::con_.");
+ }
+ }
+
+
+ friend std::ostream& operator<<(std::ostream& out, const con_& item)
+ {
+ out << "con\t ("
+ << "num_filters="<<item.num_filters_
+ << ", nr="<<item.nr()
+ << ", nc="<<item.nc()
+ << ", stride_y="<<_stride_y
+ << ", stride_x="<<_stride_x
+ << ", padding_y="<<item.padding_y_
+ << ", padding_x="<<item.padding_x_
+ << ")";
+ out << " learning_rate_mult="<<item.learning_rate_multiplier;
+ out << " weight_decay_mult="<<item.weight_decay_multiplier;
+ out << " bias_learning_rate_mult="<<item.bias_learning_rate_multiplier;
+ out << " bias_weight_decay_mult="<<item.bias_weight_decay_multiplier;
+ return out;
+ }
+
+ friend void to_xml(const con_& item, std::ostream& out)
+ {
+ out << "<con"
+ << " num_filters='"<<item.num_filters_<<"'"
+ << " nr='"<<item.nr()<<"'"
+ << " nc='"<<item.nc()<<"'"
+ << " stride_y='"<<_stride_y<<"'"
+ << " stride_x='"<<_stride_x<<"'"
+ << " padding_y='"<<item.padding_y_<<"'"
+ << " padding_x='"<<item.padding_x_<<"'"
+ << " learning_rate_mult='"<<item.learning_rate_multiplier<<"'"
+ << " weight_decay_mult='"<<item.weight_decay_multiplier<<"'"
+ << " bias_learning_rate_mult='"<<item.bias_learning_rate_multiplier<<"'"
+ << " bias_weight_decay_mult='"<<item.bias_weight_decay_multiplier<<"'>\n";
+ out << mat(item.params);
+ out << "</con>";
+ }
+
+ private:
+
+ resizable_tensor params;
+ alias_tensor filters, biases;
+
+ tt::tensor_conv conv;
+ double learning_rate_multiplier;
+ double weight_decay_multiplier;
+ double bias_learning_rate_multiplier;
+ double bias_weight_decay_multiplier;
+ long num_filters_;
+
+ // These are here only because older versions of con (which you might encounter
+ // serialized to disk) used different padding settings.
+ int padding_y_;
+ int padding_x_;
+
+ };
+
+ template <
+ long num_filters,
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using con = add_layer<con_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _num_filters,
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class cont_
+ {
+ public:
+
+ static_assert(_num_filters > 0, "The number of filters must be > 0");
+ static_assert(_nr > 0, "The number of rows in a filter must be > 0");
+ static_assert(_nc > 0, "The number of columns in a filter must be > 0");
+ static_assert(_stride_y > 0, "The filter stride must be > 0");
+ static_assert(_stride_x > 0, "The filter stride must be > 0");
+ static_assert(0 <= _padding_y && _padding_y < _nr, "The padding must be smaller than the filter size.");
+ static_assert(0 <= _padding_x && _padding_x < _nc, "The padding must be smaller than the filter size.");
+
+ cont_(
+ num_con_outputs o
+ ) :
+ learning_rate_multiplier(1),
+ weight_decay_multiplier(1),
+ bias_learning_rate_multiplier(1),
+ bias_weight_decay_multiplier(0),
+ num_filters_(o.num_outputs),
+ padding_y_(_padding_y),
+ padding_x_(_padding_x)
+ {
+ DLIB_CASSERT(num_filters_ > 0);
+ }
+
+ cont_() : cont_(num_con_outputs(_num_filters)) {}
+
+ long num_filters() const { return num_filters_; }
+ long nr() const { return _nr; }
+ long nc() const { return _nc; }
+ long stride_y() const { return _stride_y; }
+ long stride_x() const { return _stride_x; }
+ long padding_y() const { return padding_y_; }
+ long padding_x() const { return padding_x_; }
+
+ void set_num_filters(long num)
+ {
+ DLIB_CASSERT(num > 0);
+ if (num != num_filters_)
+ {
+ DLIB_CASSERT(get_layer_params().size() == 0,
+ "You can't change the number of filters in cont_ if the parameter tensor has already been allocated.");
+ num_filters_ = num;
+ }
+ }
+
+ double get_learning_rate_multiplier () const { return learning_rate_multiplier; }
+ double get_weight_decay_multiplier () const { return weight_decay_multiplier; }
+ void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
+ void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; }
+
+ double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; }
+ double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; }
+ void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; }
+ void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; }
+
+ inline dpoint map_output_to_input (
+ dpoint p
+ ) const
+ {
+ p.x() = (p.x()+padding_x()-nc()/2)/stride_x();
+ p.y() = (p.y()+padding_y()-nr()/2)/stride_y();
+ return p;
+ }
+
+ inline dpoint map_input_to_output (
+ dpoint p
+ ) const
+ {
+ p.x() = p.x()*stride_x() - padding_x() + nc()/2;
+ p.y() = p.y()*stride_y() - padding_y() + nr()/2;
+ return p;
+ }
+
+ cont_ (
+ const cont_& item
+ ) :
+ params(item.params),
+ filters(item.filters),
+ biases(item.biases),
+ learning_rate_multiplier(item.learning_rate_multiplier),
+ weight_decay_multiplier(item.weight_decay_multiplier),
+ bias_learning_rate_multiplier(item.bias_learning_rate_multiplier),
+ bias_weight_decay_multiplier(item.bias_weight_decay_multiplier),
+ num_filters_(item.num_filters_),
+ padding_y_(item.padding_y_),
+ padding_x_(item.padding_x_)
+ {
+ // this->conv is non-copyable and basically stateless, so we have to write our
+ // own copy to avoid trying to copy it and getting an error.
+ }
+
+ cont_& operator= (
+ const cont_& item
+ )
+ {
+ if (this == &item)
+ return *this;
+
+ // this->conv is non-copyable and basically stateless, so we have to write our
+ // own copy to avoid trying to copy it and getting an error.
+ params = item.params;
+ filters = item.filters;
+ biases = item.biases;
+ padding_y_ = item.padding_y_;
+ padding_x_ = item.padding_x_;
+ learning_rate_multiplier = item.learning_rate_multiplier;
+ weight_decay_multiplier = item.weight_decay_multiplier;
+ bias_learning_rate_multiplier = item.bias_learning_rate_multiplier;
+ bias_weight_decay_multiplier = item.bias_weight_decay_multiplier;
+ num_filters_ = item.num_filters_;
+ return *this;
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& sub)
+ {
+ long num_inputs = _nr*_nc*sub.get_output().k();
+ long num_outputs = num_filters_;
+ // allocate params for the filters and also for the filter bias values.
+ params.set_size(num_inputs*num_filters_ + num_filters_);
+
+ dlib::rand rnd(std::rand());
+ randomize_parameters(params, num_inputs+num_outputs, rnd);
+
+ filters = alias_tensor(sub.get_output().k(), num_filters_, _nr, _nc);
+ biases = alias_tensor(1,num_filters_);
+
+ // set the initial bias values to zero
+ biases(params,filters.size()) = 0;
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ auto filt = filters(params,0);
+ unsigned int gnr = _stride_y * (sub.get_output().nr() - 1) + filt.nr() - 2 * padding_y_;
+ unsigned int gnc = _stride_x * (sub.get_output().nc() - 1) + filt.nc() - 2 * padding_x_;
+ unsigned int gnsamps = sub.get_output().num_samples();
+ unsigned int gk = filt.k();
+ output.set_size(gnsamps,gk,gnr,gnc);
+ conv.setup(output,filt,_stride_y,_stride_x,padding_y_,padding_x_);
+ conv.get_gradient_for_data(false, sub.get_output(),filt,output);
+ tt::add(1,output,1,biases(params,filters.size()));
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
+ {
+ auto filt = filters(params,0);
+ conv(true, sub.get_gradient_input(),gradient_input, filt);
+ // no point computing the parameter gradients if they won't be used.
+ if (learning_rate_multiplier != 0)
+ {
+ auto filt = filters(params_grad,0);
+ conv.get_gradient_for_filters (false, sub.get_output(),gradient_input, filt);
+ auto b = biases(params_grad, filters.size());
+ tt::assign_conv_bias_gradient(b, gradient_input);
+ }
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const cont_& item, std::ostream& out)
+ {
+ serialize("cont_1", out);
+ serialize(item.params, out);
+ serialize(item.num_filters_, out);
+ serialize(_nr, out);
+ serialize(_nc, out);
+ serialize(_stride_y, out);
+ serialize(_stride_x, out);
+ serialize(item.padding_y_, out);
+ serialize(item.padding_x_, out);
+ serialize(item.filters, out);
+ serialize(item.biases, out);
+ serialize(item.learning_rate_multiplier, out);
+ serialize(item.weight_decay_multiplier, out);
+ serialize(item.bias_learning_rate_multiplier, out);
+ serialize(item.bias_weight_decay_multiplier, out);
+ }
+
+ friend void deserialize(cont_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ long nr;
+ long nc;
+ int stride_y;
+ int stride_x;
+ if (version == "cont_1")
+ {
+ deserialize(item.params, in);
+ deserialize(item.num_filters_, in);
+ deserialize(nr, in);
+ deserialize(nc, in);
+ deserialize(stride_y, in);
+ deserialize(stride_x, in);
+ deserialize(item.padding_y_, in);
+ deserialize(item.padding_x_, in);
+ deserialize(item.filters, in);
+ deserialize(item.biases, in);
+ deserialize(item.learning_rate_multiplier, in);
+ deserialize(item.weight_decay_multiplier, in);
+ deserialize(item.bias_learning_rate_multiplier, in);
+ deserialize(item.bias_weight_decay_multiplier, in);
+ if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::con_");
+ if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::con_");
+ if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::con_");
+ if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::con_");
+ if (stride_y != _stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::con_");
+ if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_");
+ }
+ else
+ {
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::con_.");
+ }
+ }
+
+
+ friend std::ostream& operator<<(std::ostream& out, const cont_& item)
+ {
+ out << "cont\t ("
+ << "num_filters="<<item.num_filters_
+ << ", nr="<<_nr
+ << ", nc="<<_nc
+ << ", stride_y="<<_stride_y
+ << ", stride_x="<<_stride_x
+ << ", padding_y="<<item.padding_y_
+ << ", padding_x="<<item.padding_x_
+ << ")";
+ out << " learning_rate_mult="<<item.learning_rate_multiplier;
+ out << " weight_decay_mult="<<item.weight_decay_multiplier;
+ out << " bias_learning_rate_mult="<<item.bias_learning_rate_multiplier;
+ out << " bias_weight_decay_mult="<<item.bias_weight_decay_multiplier;
+ return out;
+ }
+
+ friend void to_xml(const cont_& item, std::ostream& out)
+ {
+ out << "<cont"
+ << " num_filters='"<<item.num_filters_<<"'"
+ << " nr='"<<_nr<<"'"
+ << " nc='"<<_nc<<"'"
+ << " stride_y='"<<_stride_y<<"'"
+ << " stride_x='"<<_stride_x<<"'"
+ << " padding_y='"<<item.padding_y_<<"'"
+ << " padding_x='"<<item.padding_x_<<"'"
+ << " learning_rate_mult='"<<item.learning_rate_multiplier<<"'"
+ << " weight_decay_mult='"<<item.weight_decay_multiplier<<"'"
+ << " bias_learning_rate_mult='"<<item.bias_learning_rate_multiplier<<"'"
+ << " bias_weight_decay_mult='"<<item.bias_weight_decay_multiplier<<"'>\n";
+ out << mat(item.params);
+ out << "</cont>";
+ }
+
+ private:
+
+ resizable_tensor params;
+ alias_tensor filters, biases;
+
+ tt::tensor_conv conv;
+ double learning_rate_multiplier;
+ double weight_decay_multiplier;
+ double bias_learning_rate_multiplier;
+ double bias_weight_decay_multiplier;
+ long num_filters_;
+
+ int padding_y_;
+ int padding_x_;
+
+ };
+
+ template <
+ long num_filters,
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using cont = add_layer<cont_<num_filters,nr,nc,stride_y,stride_x>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ int scale_y,
+ int scale_x
+ >
+ class upsample_
+ {
+ public:
+ static_assert(scale_y >= 1, "upsampling scale factor can't be less than 1.");
+ static_assert(scale_x >= 1, "upsampling scale factor can't be less than 1.");
+
+ upsample_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ output.set_size(
+ sub.get_output().num_samples(),
+ sub.get_output().k(),
+ scale_y*sub.get_output().nr(),
+ scale_x*sub.get_output().nc());
+ tt::resize_bilinear(output, sub.get_output());
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ tt::resize_bilinear_gradient(sub.get_gradient_input(), gradient_input);
+ }
+
+ inline dpoint map_input_to_output (dpoint p) const
+ {
+ p.x() = p.x()*scale_x;
+ p.y() = p.y()*scale_y;
+ return p;
+ }
+ inline dpoint map_output_to_input (dpoint p) const
+ {
+ p.x() = p.x()/scale_x;
+ p.y() = p.y()/scale_y;
+ return p;
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const upsample_& , std::ostream& out)
+ {
+ serialize("upsample_", out);
+ serialize(scale_y, out);
+ serialize(scale_x, out);
+ }
+
+ friend void deserialize(upsample_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "upsample_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::upsample_.");
+
+ int _scale_y;
+ int _scale_x;
+ deserialize(_scale_y, in);
+ deserialize(_scale_x, in);
+ if (_scale_y != scale_y || _scale_x != scale_x)
+ throw serialization_error("Wrong scale found while deserializing dlib::upsample_");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const upsample_& )
+ {
+ out << "upsample\t ("
+ << "scale_y="<<scale_y
+ << ", scale_x="<<scale_x
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const upsample_& /*item*/, std::ostream& out)
+ {
+ out << "<upsample"
+ << " scale_y='"<<scale_y<<"'"
+ << " scale_x='"<<scale_x<<"'/>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ };
+
+ template <
+ int scale,
+ typename SUBNET
+ >
+ using upsample = add_layer<upsample_<scale,scale>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class max_pool_
+ {
+ static_assert(_nr >= 0, "The number of rows in a filter must be >= 0");
+ static_assert(_nc >= 0, "The number of columns in a filter must be >= 0");
+ static_assert(_stride_y > 0, "The filter stride must be > 0");
+ static_assert(_stride_x > 0, "The filter stride must be > 0");
+ static_assert(0 <= _padding_y && ((_nr==0 && _padding_y == 0) || (_nr!=0 && _padding_y < _nr)),
+ "The padding must be smaller than the filter size, unless the filters size is 0.");
+ static_assert(0 <= _padding_x && ((_nc==0 && _padding_x == 0) || (_nc!=0 && _padding_x < _nc)),
+ "The padding must be smaller than the filter size, unless the filters size is 0.");
+ public:
+
+
+ max_pool_(
+ ) :
+ padding_y_(_padding_y),
+ padding_x_(_padding_x)
+ {}
+
+ long nr() const { return _nr; }
+ long nc() const { return _nc; }
+ long stride_y() const { return _stride_y; }
+ long stride_x() const { return _stride_x; }
+ long padding_y() const { return padding_y_; }
+ long padding_x() const { return padding_x_; }
+
+ inline dpoint map_input_to_output (
+ dpoint p
+ ) const
+ {
+ p.x() = (p.x()+padding_x()-nc()/2)/stride_x();
+ p.y() = (p.y()+padding_y()-nr()/2)/stride_y();
+ return p;
+ }
+
+ inline dpoint map_output_to_input (
+ dpoint p
+ ) const
+ {
+ p.x() = p.x()*stride_x() - padding_x() + nc()/2;
+ p.y() = p.y()*stride_y() - padding_y() + nr()/2;
+ return p;
+ }
+
+ max_pool_ (
+ const max_pool_& item
+ ) :
+ padding_y_(item.padding_y_),
+ padding_x_(item.padding_x_)
+ {
+ // this->mp is non-copyable so we have to write our own copy to avoid trying to
+ // copy it and getting an error.
+ }
+
+ max_pool_& operator= (
+ const max_pool_& item
+ )
+ {
+ if (this == &item)
+ return *this;
+
+ padding_y_ = item.padding_y_;
+ padding_x_ = item.padding_x_;
+
+ // this->mp is non-copyable so we have to write our own copy to avoid trying to
+ // copy it and getting an error.
+ return *this;
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ mp.setup_max_pooling(_nr!=0?_nr:sub.get_output().nr(),
+ _nc!=0?_nc:sub.get_output().nc(),
+ _stride_y, _stride_x, padding_y_, padding_x_);
+
+ mp(output, sub.get_output());
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ mp.setup_max_pooling(_nr!=0?_nr:sub.get_output().nr(),
+ _nc!=0?_nc:sub.get_output().nc(),
+ _stride_y, _stride_x, padding_y_, padding_x_);
+
+ mp.get_gradient(gradient_input, computed_output, sub.get_output(), sub.get_gradient_input());
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const max_pool_& item, std::ostream& out)
+ {
+ serialize("max_pool_2", out);
+ serialize(_nr, out);
+ serialize(_nc, out);
+ serialize(_stride_y, out);
+ serialize(_stride_x, out);
+ serialize(item.padding_y_, out);
+ serialize(item.padding_x_, out);
+ }
+
+ friend void deserialize(max_pool_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ long nr;
+ long nc;
+ int stride_y;
+ int stride_x;
+ if (version == "max_pool_2")
+ {
+ deserialize(nr, in);
+ deserialize(nc, in);
+ deserialize(stride_y, in);
+ deserialize(stride_x, in);
+ deserialize(item.padding_y_, in);
+ deserialize(item.padding_x_, in);
+ }
+ else
+ {
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::max_pool_.");
+ }
+
+ if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::max_pool_");
+ if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::max_pool_");
+ if (_nr != nr) throw serialization_error("Wrong nr found while deserializing dlib::max_pool_");
+ if (_nc != nc) throw serialization_error("Wrong nc found while deserializing dlib::max_pool_");
+ if (_stride_y != stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::max_pool_");
+ if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::max_pool_");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const max_pool_& item)
+ {
+ out << "max_pool ("
+ << "nr="<<_nr
+ << ", nc="<<_nc
+ << ", stride_y="<<_stride_y
+ << ", stride_x="<<_stride_x
+ << ", padding_y="<<item.padding_y_
+ << ", padding_x="<<item.padding_x_
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const max_pool_& item, std::ostream& out)
+ {
+ out << "<max_pool"
+ << " nr='"<<_nr<<"'"
+ << " nc='"<<_nc<<"'"
+ << " stride_y='"<<_stride_y<<"'"
+ << " stride_x='"<<_stride_x<<"'"
+ << " padding_y='"<<item.padding_y_<<"'"
+ << " padding_x='"<<item.padding_x_<<"'"
+ << "/>\n";
+ }
+
+
+ private:
+
+
+ tt::pooling mp;
+ resizable_tensor params;
+
+ int padding_y_;
+ int padding_x_;
+ };
+
+ template <
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using max_pool = add_layer<max_pool_<nr,nc,stride_y,stride_x>, SUBNET>;
+
+ template <
+ typename SUBNET
+ >
+ using max_pool_everything = add_layer<max_pool_<0,0,1,1>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _nr,
+ long _nc,
+ int _stride_y,
+ int _stride_x,
+ int _padding_y = _stride_y!=1? 0 : _nr/2,
+ int _padding_x = _stride_x!=1? 0 : _nc/2
+ >
+ class avg_pool_
+ {
+ public:
+ static_assert(_nr >= 0, "The number of rows in a filter must be >= 0");
+ static_assert(_nc >= 0, "The number of columns in a filter must be >= 0");
+ static_assert(_stride_y > 0, "The filter stride must be > 0");
+ static_assert(_stride_x > 0, "The filter stride must be > 0");
+ static_assert(0 <= _padding_y && ((_nr==0 && _padding_y == 0) || (_nr!=0 && _padding_y < _nr)),
+ "The padding must be smaller than the filter size, unless the filters size is 0.");
+ static_assert(0 <= _padding_x && ((_nc==0 && _padding_x == 0) || (_nc!=0 && _padding_x < _nc)),
+ "The padding must be smaller than the filter size, unless the filters size is 0.");
+
+ avg_pool_(
+ ) :
+ padding_y_(_padding_y),
+ padding_x_(_padding_x)
+ {}
+
+ long nr() const { return _nr; }
+ long nc() const { return _nc; }
+ long stride_y() const { return _stride_y; }
+ long stride_x() const { return _stride_x; }
+ long padding_y() const { return padding_y_; }
+ long padding_x() const { return padding_x_; }
+
+ inline dpoint map_input_to_output (
+ dpoint p
+ ) const
+ {
+ p.x() = (p.x()+padding_x()-nc()/2)/stride_x();
+ p.y() = (p.y()+padding_y()-nr()/2)/stride_y();
+ return p;
+ }
+
+ inline dpoint map_output_to_input (
+ dpoint p
+ ) const
+ {
+ p.x() = p.x()*stride_x() - padding_x() + nc()/2;
+ p.y() = p.y()*stride_y() - padding_y() + nr()/2;
+ return p;
+ }
+
+ avg_pool_ (
+ const avg_pool_& item
+ ) :
+ padding_y_(item.padding_y_),
+ padding_x_(item.padding_x_)
+ {
+ // this->ap is non-copyable so we have to write our own copy to avoid trying to
+ // copy it and getting an error.
+ }
+
+ avg_pool_& operator= (
+ const avg_pool_& item
+ )
+ {
+ if (this == &item)
+ return *this;
+
+ padding_y_ = item.padding_y_;
+ padding_x_ = item.padding_x_;
+
+ // this->ap is non-copyable so we have to write our own copy to avoid trying to
+ // copy it and getting an error.
+ return *this;
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ ap.setup_avg_pooling(_nr!=0?_nr:sub.get_output().nr(),
+ _nc!=0?_nc:sub.get_output().nc(),
+ _stride_y, _stride_x, padding_y_, padding_x_);
+
+ ap(output, sub.get_output());
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ ap.setup_avg_pooling(_nr!=0?_nr:sub.get_output().nr(),
+ _nc!=0?_nc:sub.get_output().nc(),
+ _stride_y, _stride_x, padding_y_, padding_x_);
+
+ ap.get_gradient(gradient_input, computed_output, sub.get_output(), sub.get_gradient_input());
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const avg_pool_& item, std::ostream& out)
+ {
+ serialize("avg_pool_2", out);
+ serialize(_nr, out);
+ serialize(_nc, out);
+ serialize(_stride_y, out);
+ serialize(_stride_x, out);
+ serialize(item.padding_y_, out);
+ serialize(item.padding_x_, out);
+ }
+
+ friend void deserialize(avg_pool_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+
+ long nr;
+ long nc;
+ int stride_y;
+ int stride_x;
+ if (version == "avg_pool_2")
+ {
+ deserialize(nr, in);
+ deserialize(nc, in);
+ deserialize(stride_y, in);
+ deserialize(stride_x, in);
+ deserialize(item.padding_y_, in);
+ deserialize(item.padding_x_, in);
+ }
+ else
+ {
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::avg_pool_.");
+ }
+
+ if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::avg_pool_");
+ if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::avg_pool_");
+ if (_nr != nr) throw serialization_error("Wrong nr found while deserializing dlib::avg_pool_");
+ if (_nc != nc) throw serialization_error("Wrong nc found while deserializing dlib::avg_pool_");
+ if (_stride_y != stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::avg_pool_");
+ if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::avg_pool_");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const avg_pool_& item)
+ {
+ out << "avg_pool ("
+ << "nr="<<_nr
+ << ", nc="<<_nc
+ << ", stride_y="<<_stride_y
+ << ", stride_x="<<_stride_x
+ << ", padding_y="<<item.padding_y_
+ << ", padding_x="<<item.padding_x_
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const avg_pool_& item, std::ostream& out)
+ {
+ out << "<avg_pool"
+ << " nr='"<<_nr<<"'"
+ << " nc='"<<_nc<<"'"
+ << " stride_y='"<<_stride_y<<"'"
+ << " stride_x='"<<_stride_x<<"'"
+ << " padding_y='"<<item.padding_y_<<"'"
+ << " padding_x='"<<item.padding_x_<<"'"
+ << "/>\n";
+ }
+ private:
+
+ tt::pooling ap;
+ resizable_tensor params;
+
+ int padding_y_;
+ int padding_x_;
+ };
+
+ template <
+ long nr,
+ long nc,
+ int stride_y,
+ int stride_x,
+ typename SUBNET
+ >
+ using avg_pool = add_layer<avg_pool_<nr,nc,stride_y,stride_x>, SUBNET>;
+
+ template <
+ typename SUBNET
+ >
+ using avg_pool_everything = add_layer<avg_pool_<0,0,1,1>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ enum layer_mode
+ {
+ CONV_MODE = 0,
+ FC_MODE = 1
+ };
+
+ const double DEFAULT_BATCH_NORM_EPS = 0.0001;
+
+ template <
+ layer_mode mode
+ >
+ class bn_
+ {
+ public:
+ explicit bn_(
+ unsigned long window_size,
+ double eps_ = DEFAULT_BATCH_NORM_EPS
+ ) :
+ num_updates(0),
+ running_stats_window_size(window_size),
+ learning_rate_multiplier(1),
+ weight_decay_multiplier(0),
+ bias_learning_rate_multiplier(1),
+ bias_weight_decay_multiplier(1),
+ eps(eps_)
+ {
+ DLIB_CASSERT(window_size > 0, "The batch normalization running stats window size can't be 0.");
+ }
+
+ bn_() : bn_(100) {}
+
+ layer_mode get_mode() const { return mode; }
+ unsigned long get_running_stats_window_size () const { return running_stats_window_size; }
+ void set_running_stats_window_size (unsigned long new_window_size )
+ {
+ DLIB_CASSERT(new_window_size > 0, "The batch normalization running stats window size can't be 0.");
+ running_stats_window_size = new_window_size;
+ }
+ double get_eps() const { return eps; }
+
+ double get_learning_rate_multiplier () const { return learning_rate_multiplier; }
+ double get_weight_decay_multiplier () const { return weight_decay_multiplier; }
+ void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
+ void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; }
+
+ double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; }
+ double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; }
+ void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; }
+ void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+
+ template <typename SUBNET>
+ void setup (const SUBNET& sub)
+ {
+ if (mode == FC_MODE)
+ {
+ gamma = alias_tensor(1,
+ sub.get_output().k(),
+ sub.get_output().nr(),
+ sub.get_output().nc());
+ }
+ else
+ {
+ gamma = alias_tensor(1, sub.get_output().k());
+ }
+ beta = gamma;
+
+ params.set_size(gamma.size()+beta.size());
+
+ gamma(params,0) = 1;
+ beta(params,gamma.size()) = 0;
+
+ running_means.copy_size(gamma(params,0));
+ running_variances.copy_size(gamma(params,0));
+ running_means = 0;
+ running_variances = 1;
+ num_updates = 0;
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ auto g = gamma(params,0);
+ auto b = beta(params,gamma.size());
+ if (sub.get_output().num_samples() > 1)
+ {
+ const double decay = 1.0 - num_updates/(num_updates+1.0);
+ ++num_updates;
+ if (num_updates > running_stats_window_size)
+ num_updates = running_stats_window_size;
+
+ if (mode == FC_MODE)
+ tt::batch_normalize(eps, output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
+ else
+ tt::batch_normalize_conv(eps, output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b);
+ }
+ else // we are running in testing mode so we just linearly scale the input tensor.
+ {
+ if (mode == FC_MODE)
+ tt::batch_normalize_inference(eps, output, sub.get_output(), g, b, running_means, running_variances);
+ else
+ tt::batch_normalize_conv_inference(eps, output, sub.get_output(), g, b, running_means, running_variances);
+ }
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
+ {
+ auto g = gamma(params,0);
+ auto g_grad = gamma(params_grad, 0);
+ auto b_grad = beta(params_grad, gamma.size());
+ if (mode == FC_MODE)
+ tt::batch_normalize_gradient(eps, gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad );
+ else
+ tt::batch_normalize_conv_gradient(eps, gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad );
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const bn_& item, std::ostream& out)
+ {
+ if (mode == CONV_MODE)
+ serialize("bn_con2", out);
+ else // if FC_MODE
+ serialize("bn_fc2", out);
+ serialize(item.params, out);
+ serialize(item.gamma, out);
+ serialize(item.beta, out);
+ serialize(item.means, out);
+ serialize(item.invstds, out);
+ serialize(item.running_means, out);
+ serialize(item.running_variances, out);
+ serialize(item.num_updates, out);
+ serialize(item.running_stats_window_size, out);
+ serialize(item.learning_rate_multiplier, out);
+ serialize(item.weight_decay_multiplier, out);
+ serialize(item.bias_learning_rate_multiplier, out);
+ serialize(item.bias_weight_decay_multiplier, out);
+ serialize(item.eps, out);
+ }
+
+ friend void deserialize(bn_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (mode == CONV_MODE)
+ {
+ if (version != "bn_con2")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::bn_.");
+ }
+ else // must be in FC_MODE
+ {
+ if (version != "bn_fc2")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::bn_.");
+ }
+
+ deserialize(item.params, in);
+ deserialize(item.gamma, in);
+ deserialize(item.beta, in);
+ deserialize(item.means, in);
+ deserialize(item.invstds, in);
+ deserialize(item.running_means, in);
+ deserialize(item.running_variances, in);
+ deserialize(item.num_updates, in);
+ deserialize(item.running_stats_window_size, in);
+ deserialize(item.learning_rate_multiplier, in);
+ deserialize(item.weight_decay_multiplier, in);
+ deserialize(item.bias_learning_rate_multiplier, in);
+ deserialize(item.bias_weight_decay_multiplier, in);
+ deserialize(item.eps, in);
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const bn_& item)
+ {
+ if (mode == CONV_MODE)
+ out << "bn_con ";
+ else
+ out << "bn_fc ";
+ out << " eps="<<item.eps;
+ out << " running_stats_window_size="<<item.running_stats_window_size;
+ out << " learning_rate_mult="<<item.learning_rate_multiplier;
+ out << " weight_decay_mult="<<item.weight_decay_multiplier;
+ out << " bias_learning_rate_mult="<<item.bias_learning_rate_multiplier;
+ out << " bias_weight_decay_mult="<<item.bias_weight_decay_multiplier;
+ return out;
+ }
+
+ friend void to_xml(const bn_& item, std::ostream& out)
+ {
+ if (mode==CONV_MODE)
+ out << "<bn_con";
+ else
+ out << "<bn_fc";
+
+ out << " eps='"<<item.eps<<"'";
+ out << " running_stats_window_size='"<<item.running_stats_window_size<<"'";
+ out << " learning_rate_mult='"<<item.learning_rate_multiplier<<"'";
+ out << " weight_decay_mult='"<<item.weight_decay_multiplier<<"'";
+ out << " bias_learning_rate_mult='"<<item.bias_learning_rate_multiplier<<"'";
+ out << " bias_weight_decay_mult='"<<item.bias_weight_decay_multiplier<<"'";
+ out << ">\n";
+
+ out << mat(item.params);
+
+ if (mode==CONV_MODE)
+ out << "</bn_con>\n";
+ else
+ out << "</bn_fc>\n";
+ }
+
+ private:
+
+ friend class affine_;
+
+ resizable_tensor params;
+ alias_tensor gamma, beta;
+ resizable_tensor means, running_means;
+ resizable_tensor invstds, running_variances;
+ unsigned long num_updates;
+ unsigned long running_stats_window_size;
+ double learning_rate_multiplier;
+ double weight_decay_multiplier;
+ double bias_learning_rate_multiplier;
+ double bias_weight_decay_multiplier;
+ double eps;
+ };
+
+ template <typename SUBNET>
+ using bn_con = add_layer<bn_<CONV_MODE>, SUBNET>;
+ template <typename SUBNET>
+ using bn_fc = add_layer<bn_<FC_MODE>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ class visitor_bn_running_stats_window_size
+ {
+ public:
+
+ visitor_bn_running_stats_window_size(unsigned long new_window_size_) : new_window_size(new_window_size_) {}
+
+ template <typename T>
+ void set_window_size(T&) const
+ {
+ // ignore other layer detail types
+ }
+
+ template < layer_mode mode >
+ void set_window_size(bn_<mode>& l) const
+ {
+ l.set_running_stats_window_size(new_window_size);
+ }
+
+ template<typename input_layer_type>
+ void operator()(size_t , input_layer_type& ) const
+ {
+ // ignore other layers
+ }
+
+ template <typename T, typename U, typename E>
+ void operator()(size_t , add_layer<T,U,E>& l) const
+ {
+ set_window_size(l.layer_details());
+ }
+
+ private:
+
+ unsigned long new_window_size;
+ };
+ }
+
+ template <typename net_type>
+ void set_all_bn_running_stats_window_sizes (
+ net_type& net,
+ unsigned long new_window_size
+ )
+ {
+ visit_layers(net, impl::visitor_bn_running_stats_window_size(new_window_size));
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ enum fc_bias_mode
+ {
+ FC_HAS_BIAS = 0,
+ FC_NO_BIAS = 1
+ };
+
+ struct num_fc_outputs
+ {
+ num_fc_outputs(unsigned long n) : num_outputs(n) {}
+ unsigned long num_outputs;
+ };
+
+ template <
+ unsigned long num_outputs_,
+ fc_bias_mode bias_mode
+ >
+ class fc_
+ {
+ static_assert(num_outputs_ > 0, "The number of outputs from a fc_ layer must be > 0");
+
+ public:
+ fc_(num_fc_outputs o) : num_outputs(o.num_outputs), num_inputs(0),
+ learning_rate_multiplier(1),
+ weight_decay_multiplier(1),
+ bias_learning_rate_multiplier(1),
+ bias_weight_decay_multiplier(0)
+ {}
+
+ fc_() : fc_(num_fc_outputs(num_outputs_)) {}
+
+ double get_learning_rate_multiplier () const { return learning_rate_multiplier; }
+ double get_weight_decay_multiplier () const { return weight_decay_multiplier; }
+ void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; }
+ void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; }
+
+ double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; }
+ double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; }
+ void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; }
+ void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; }
+
+ unsigned long get_num_outputs (
+ ) const { return num_outputs; }
+
+ void set_num_outputs(long num)
+ {
+ DLIB_CASSERT(num > 0);
+ if (num != (long)num_outputs)
+ {
+ DLIB_CASSERT(get_layer_params().size() == 0,
+ "You can't change the number of filters in fc_ if the parameter tensor has already been allocated.");
+ num_outputs = num;
+ }
+ }
+
+ fc_bias_mode get_bias_mode (
+ ) const { return bias_mode; }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& sub)
+ {
+ num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
+ if (bias_mode == FC_HAS_BIAS)
+ params.set_size(num_inputs+1, num_outputs);
+ else
+ params.set_size(num_inputs, num_outputs);
+
+ dlib::rand rnd(std::rand());
+ randomize_parameters(params, num_inputs+num_outputs, rnd);
+
+ weights = alias_tensor(num_inputs, num_outputs);
+
+ if (bias_mode == FC_HAS_BIAS)
+ {
+ biases = alias_tensor(1,num_outputs);
+ // set the initial bias values to zero
+ biases(params,weights.size()) = 0;
+ }
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ DLIB_CASSERT((long)num_inputs == sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k(),
+ "The size of the input tensor to this fc layer doesn't match the size the fc layer was trained with.");
+ output.set_size(sub.get_output().num_samples(), num_outputs);
+
+ auto w = weights(params, 0);
+ tt::gemm(0,output, 1,sub.get_output(),false, w,false);
+ if (bias_mode == FC_HAS_BIAS)
+ {
+ auto b = biases(params, weights.size());
+ tt::add(1,output,1,b);
+ }
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad)
+ {
+ // no point computing the parameter gradients if they won't be used.
+ if (learning_rate_multiplier != 0)
+ {
+ // compute the gradient of the weight parameters.
+ auto pw = weights(params_grad, 0);
+ tt::gemm(0,pw, 1,sub.get_output(),true, gradient_input,false);
+
+ if (bias_mode == FC_HAS_BIAS)
+ {
+ // compute the gradient of the bias parameters.
+ auto pb = biases(params_grad, weights.size());
+ tt::assign_bias_gradient(pb, gradient_input);
+ }
+ }
+
+ // compute the gradient for the data
+ auto w = weights(params, 0);
+ tt::gemm(1,sub.get_gradient_input(), 1,gradient_input,false, w,true);
+ }
+
+ alias_tensor_instance get_weights()
+ {
+ return weights(params, 0);
+ }
+
+ alias_tensor_const_instance get_weights() const
+ {
+ return weights(params, 0);
+ }
+
+ alias_tensor_instance get_biases()
+ {
+ static_assert(bias_mode == FC_HAS_BIAS, "This fc_ layer doesn't have a bias vector "
+ "to be retrieved, as per template parameter 'bias_mode'.");
+ return biases(params, weights.size());
+ }
+
+ alias_tensor_const_instance get_biases() const
+ {
+ static_assert(bias_mode == FC_HAS_BIAS, "This fc_ layer doesn't have a bias vector "
+ "to be retrieved, as per template parameter 'bias_mode'.");
+ return biases(params, weights.size());
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const fc_& item, std::ostream& out)
+ {
+ serialize("fc_2", out);
+ serialize(item.num_outputs, out);
+ serialize(item.num_inputs, out);
+ serialize(item.params, out);
+ serialize(item.weights, out);
+ serialize(item.biases, out);
+ serialize((int)bias_mode, out);
+ serialize(item.learning_rate_multiplier, out);
+ serialize(item.weight_decay_multiplier, out);
+ serialize(item.bias_learning_rate_multiplier, out);
+ serialize(item.bias_weight_decay_multiplier, out);
+ }
+
+ friend void deserialize(fc_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "fc_2")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::fc_.");
+
+ deserialize(item.num_outputs, in);
+ deserialize(item.num_inputs, in);
+ deserialize(item.params, in);
+ deserialize(item.weights, in);
+ deserialize(item.biases, in);
+ int bmode = 0;
+ deserialize(bmode, in);
+ if (bias_mode != (fc_bias_mode)bmode) throw serialization_error("Wrong fc_bias_mode found while deserializing dlib::fc_");
+ deserialize(item.learning_rate_multiplier, in);
+ deserialize(item.weight_decay_multiplier, in);
+ deserialize(item.bias_learning_rate_multiplier, in);
+ deserialize(item.bias_weight_decay_multiplier, in);
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const fc_& item)
+ {
+ if (bias_mode == FC_HAS_BIAS)
+ {
+ out << "fc\t ("
+ << "num_outputs="<<item.num_outputs
+ << ")";
+ out << " learning_rate_mult="<<item.learning_rate_multiplier;
+ out << " weight_decay_mult="<<item.weight_decay_multiplier;
+ out << " bias_learning_rate_mult="<<item.bias_learning_rate_multiplier;
+ out << " bias_weight_decay_mult="<<item.bias_weight_decay_multiplier;
+ }
+ else
+ {
+ out << "fc_no_bias ("
+ << "num_outputs="<<item.num_outputs
+ << ")";
+ out << " learning_rate_mult="<<item.learning_rate_multiplier;
+ out << " weight_decay_mult="<<item.weight_decay_multiplier;
+ }
+ return out;
+ }
+
+ friend void to_xml(const fc_& item, std::ostream& out)
+ {
+ if (bias_mode==FC_HAS_BIAS)
+ {
+ out << "<fc"
+ << " num_outputs='"<<item.num_outputs<<"'"
+ << " learning_rate_mult='"<<item.learning_rate_multiplier<<"'"
+ << " weight_decay_mult='"<<item.weight_decay_multiplier<<"'"
+ << " bias_learning_rate_mult='"<<item.bias_learning_rate_multiplier<<"'"
+ << " bias_weight_decay_mult='"<<item.bias_weight_decay_multiplier<<"'";
+ out << ">\n";
+ out << mat(item.params);
+ out << "</fc>\n";
+ }
+ else
+ {
+ out << "<fc_no_bias"
+ << " num_outputs='"<<item.num_outputs<<"'"
+ << " learning_rate_mult='"<<item.learning_rate_multiplier<<"'"
+ << " weight_decay_mult='"<<item.weight_decay_multiplier<<"'";
+ out << ">\n";
+ out << mat(item.params);
+ out << "</fc_no_bias>\n";
+ }
+ }
+
+ private:
+
+ unsigned long num_outputs;
+ unsigned long num_inputs;
+ resizable_tensor params;
+ alias_tensor weights, biases;
+ double learning_rate_multiplier;
+ double weight_decay_multiplier;
+ double bias_learning_rate_multiplier;
+ double bias_weight_decay_multiplier;
+ };
+
+ template <
+ unsigned long num_outputs,
+ typename SUBNET
+ >
+ using fc = add_layer<fc_<num_outputs,FC_HAS_BIAS>, SUBNET>;
+
+ template <
+ unsigned long num_outputs,
+ typename SUBNET
+ >
+ using fc_no_bias = add_layer<fc_<num_outputs,FC_NO_BIAS>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class dropout_
+ {
+ public:
+ explicit dropout_(
+ float drop_rate_ = 0.5
+ ) :
+ drop_rate(drop_rate_),
+ rnd(std::rand())
+ {
+ DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1);
+ }
+
+ // We have to add a copy constructor and assignment operator because the rnd object
+ // is non-copyable.
+ dropout_(
+ const dropout_& item
+ ) : drop_rate(item.drop_rate), mask(item.mask), rnd(std::rand())
+ {}
+
+ dropout_& operator= (
+ const dropout_& item
+ )
+ {
+ if (this == &item)
+ return *this;
+
+ drop_rate = item.drop_rate;
+ mask = item.mask;
+ return *this;
+ }
+
+ float get_drop_rate (
+ ) const { return drop_rate; }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ // create a random mask and use it to filter the data
+ mask.copy_size(input);
+ rnd.fill_uniform(mask);
+ tt::threshold(mask, drop_rate);
+ tt::multiply(false, output, input, mask);
+ }
+
+ void backward_inplace(
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor& /*params_grad*/
+ )
+ {
+ if (is_same_object(gradient_input, data_grad))
+ tt::multiply(false, data_grad, mask, gradient_input);
+ else
+ tt::multiply(true, data_grad, mask, gradient_input);
+ }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const dropout_& item, std::ostream& out)
+ {
+ serialize("dropout_", out);
+ serialize(item.drop_rate, out);
+ serialize(item.mask, out);
+ }
+
+ friend void deserialize(dropout_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "dropout_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::dropout_.");
+ deserialize(item.drop_rate, in);
+ deserialize(item.mask, in);
+ }
+
+ void clean(
+ )
+ {
+ mask.clear();
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const dropout_& item)
+ {
+ out << "dropout\t ("
+ << "drop_rate="<<item.drop_rate
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const dropout_& item, std::ostream& out)
+ {
+ out << "<dropout"
+ << " drop_rate='"<<item.drop_rate<<"'";
+ out << "/>\n";
+ }
+
+ private:
+ float drop_rate;
+ resizable_tensor mask;
+
+ tt::tensor_rand rnd;
+ resizable_tensor params; // unused
+ };
+
+
+ template <typename SUBNET>
+ using dropout = add_layer<dropout_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class multiply_
+ {
+ public:
+ explicit multiply_(
+ float val_ = 0.5
+ ) :
+ val(val_)
+ {
+ }
+
+ multiply_ (
+ const dropout_& item
+ ) : val(1-item.get_drop_rate()) {}
+
+ float get_multiply_value (
+ ) const { return val; }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::affine_transform(output, input, val);
+ }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ void backward_inplace(
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor& /*params_grad*/
+ )
+ {
+ if (is_same_object(gradient_input, data_grad))
+ tt::affine_transform(data_grad, gradient_input, val);
+ else
+ tt::affine_transform(data_grad, data_grad, gradient_input, 1, val);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const multiply_& item, std::ostream& out)
+ {
+ serialize("multiply_", out);
+ serialize(item.val, out);
+ }
+
+ friend void deserialize(multiply_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version == "dropout_")
+ {
+ // Since we can build a multiply_ from a dropout_ we check if that's what
+ // is in the stream and if so then just convert it right here.
+ unserialize sin(version, in);
+ dropout_ temp;
+ deserialize(temp, sin);
+ item = temp;
+ return;
+ }
+
+ if (version != "multiply_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::multiply_.");
+ deserialize(item.val, in);
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const multiply_& item)
+ {
+ out << "multiply ("
+ << "val="<<item.val
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const multiply_& item, std::ostream& out)
+ {
+ out << "<multiply"
+ << " val='"<<item.val<<"'";
+ out << "/>\n";
+ }
+ private:
+ float val;
+ resizable_tensor params; // unused
+ };
+
+ template <typename SUBNET>
+ using multiply = add_layer<multiply_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class affine_
+ {
+ public:
+ affine_(
+ ) : mode(FC_MODE)
+ {
+ }
+
+ affine_(
+ layer_mode mode_
+ ) : mode(mode_)
+ {
+ }
+
+ template <
+ layer_mode bnmode
+ >
+ affine_(
+ const bn_<bnmode>& item
+ )
+ {
+ gamma = item.gamma;
+ beta = item.beta;
+ mode = bnmode;
+
+ params.copy_size(item.params);
+
+ auto g = gamma(params,0);
+ auto b = beta(params,gamma.size());
+
+ resizable_tensor temp(item.params);
+ auto sg = gamma(temp,0);
+ auto sb = beta(temp,gamma.size());
+
+ g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_variances)+item.get_eps()));
+ b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means));
+ }
+
+ layer_mode get_mode() const { return mode; }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& sub)
+ {
+ if (mode == FC_MODE)
+ {
+ gamma = alias_tensor(1,
+ sub.get_output().k(),
+ sub.get_output().nr(),
+ sub.get_output().nc());
+ }
+ else
+ {
+ gamma = alias_tensor(1, sub.get_output().k());
+ }
+ beta = gamma;
+
+ params.set_size(gamma.size()+beta.size());
+
+ gamma(params,0) = 1;
+ beta(params,gamma.size()) = 0;
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ auto g = gamma(params,0);
+ auto b = beta(params,gamma.size());
+ if (mode == FC_MODE)
+ tt::affine_transform(output, input, g, b);
+ else
+ tt::affine_transform_conv(output, input, g, b);
+ }
+
+ void backward_inplace(
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor& /*params_grad*/
+ )
+ {
+ auto g = gamma(params,0);
+ auto b = beta(params,gamma.size());
+
+ // We are computing the gradient of dot(gradient_input, computed_output*g + b)
+ if (mode == FC_MODE)
+ {
+ if (is_same_object(gradient_input, data_grad))
+ tt::multiply(false, data_grad, gradient_input, g);
+ else
+ tt::multiply(true, data_grad, gradient_input, g);
+ }
+ else
+ {
+ if (is_same_object(gradient_input, data_grad))
+ tt::multiply_conv(false, data_grad, gradient_input, g);
+ else
+ tt::multiply_conv(true, data_grad, gradient_input, g);
+ }
+ }
+
+ const tensor& get_layer_params() const { return empty_params; }
+ tensor& get_layer_params() { return empty_params; }
+
+ friend void serialize(const affine_& item, std::ostream& out)
+ {
+ serialize("affine_", out);
+ serialize(item.params, out);
+ serialize(item.gamma, out);
+ serialize(item.beta, out);
+ serialize((int)item.mode, out);
+ }
+
+ friend void deserialize(affine_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version == "bn_con2")
+ {
+ // Since we can build an affine_ from a bn_ we check if that's what is in
+ // the stream and if so then just convert it right here.
+ unserialize sin(version, in);
+ bn_<CONV_MODE> temp;
+ deserialize(temp, sin);
+ item = temp;
+ return;
+ }
+ else if (version == "bn_fc2")
+ {
+ // Since we can build an affine_ from a bn_ we check if that's what is in
+ // the stream and if so then just convert it right here.
+ unserialize sin(version, in);
+ bn_<FC_MODE> temp;
+ deserialize(temp, sin);
+ item = temp;
+ return;
+ }
+
+ if (version != "affine_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::affine_.");
+ deserialize(item.params, in);
+ deserialize(item.gamma, in);
+ deserialize(item.beta, in);
+ int mode;
+ deserialize(mode, in);
+ item.mode = (layer_mode)mode;
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const affine_& )
+ {
+ out << "affine";
+ return out;
+ }
+
+ friend void to_xml(const affine_& item, std::ostream& out)
+ {
+ if (item.mode==CONV_MODE)
+ out << "<affine_con>\n";
+ else
+ out << "<affine_fc>\n";
+
+ out << mat(item.params);
+
+ if (item.mode==CONV_MODE)
+ out << "</affine_con>\n";
+ else
+ out << "</affine_fc>\n";
+ }
+
+ private:
+ resizable_tensor params, empty_params;
+ alias_tensor gamma, beta;
+ layer_mode mode;
+ };
+
+ template <typename SUBNET>
+ using affine = add_layer<affine_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ template<typename> class tag
+ >
+ class add_prev_
+ {
+ public:
+ const static unsigned long id = tag_id<tag>::id;
+
+ add_prev_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ auto&& t1 = sub.get_output();
+ auto&& t2 = layer<tag>(sub).get_output();
+ output.set_size(std::max(t1.num_samples(),t2.num_samples()),
+ std::max(t1.k(),t2.k()),
+ std::max(t1.nr(),t2.nr()),
+ std::max(t1.nc(),t2.nc()));
+ tt::add(output, t1, t2);
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ // The gradient just flows backwards to the two layers that forward() added
+ // together.
+ tt::add(sub.get_gradient_input(), sub.get_gradient_input(), gradient_input);
+ tt::add(layer<tag>(sub).get_gradient_input(), layer<tag>(sub).get_gradient_input(), gradient_input);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ friend void serialize(const add_prev_& , std::ostream& out)
+ {
+ serialize("add_prev_", out);
+ }
+
+ friend void deserialize(add_prev_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "add_prev_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::add_prev_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const add_prev_& item)
+ {
+ out << "add_prev"<<id;
+ return out;
+ }
+
+ friend void to_xml(const add_prev_& item, std::ostream& out)
+ {
+ out << "<add_prev tag='"<<id<<"'/>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ };
+
+ template <
+ template<typename> class tag,
+ typename SUBNET
+ >
+ using add_prev = add_layer<add_prev_<tag>, SUBNET>;
+
+ template <typename SUBNET> using add_prev1 = add_prev<tag1, SUBNET>;
+ template <typename SUBNET> using add_prev2 = add_prev<tag2, SUBNET>;
+ template <typename SUBNET> using add_prev3 = add_prev<tag3, SUBNET>;
+ template <typename SUBNET> using add_prev4 = add_prev<tag4, SUBNET>;
+ template <typename SUBNET> using add_prev5 = add_prev<tag5, SUBNET>;
+ template <typename SUBNET> using add_prev6 = add_prev<tag6, SUBNET>;
+ template <typename SUBNET> using add_prev7 = add_prev<tag7, SUBNET>;
+ template <typename SUBNET> using add_prev8 = add_prev<tag8, SUBNET>;
+ template <typename SUBNET> using add_prev9 = add_prev<tag9, SUBNET>;
+ template <typename SUBNET> using add_prev10 = add_prev<tag10, SUBNET>;
+
+ using add_prev1_ = add_prev_<tag1>;
+ using add_prev2_ = add_prev_<tag2>;
+ using add_prev3_ = add_prev_<tag3>;
+ using add_prev4_ = add_prev_<tag4>;
+ using add_prev5_ = add_prev_<tag5>;
+ using add_prev6_ = add_prev_<tag6>;
+ using add_prev7_ = add_prev_<tag7>;
+ using add_prev8_ = add_prev_<tag8>;
+ using add_prev9_ = add_prev_<tag9>;
+ using add_prev10_ = add_prev_<tag10>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ template<typename> class tag
+ >
+ class mult_prev_
+ {
+ public:
+ const static unsigned long id = tag_id<tag>::id;
+
+ mult_prev_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ auto&& t1 = sub.get_output();
+ auto&& t2 = layer<tag>(sub).get_output();
+ output.set_size(std::max(t1.num_samples(),t2.num_samples()),
+ std::max(t1.k(),t2.k()),
+ std::max(t1.nr(),t2.nr()),
+ std::max(t1.nc(),t2.nc()));
+ tt::multiply_zero_padded(false, output, t1, t2);
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ auto&& t1 = sub.get_output();
+ auto&& t2 = layer<tag>(sub).get_output();
+ // The gradient just flows backwards to the two layers that forward()
+ // multiplied together.
+ tt::multiply_zero_padded(true, sub.get_gradient_input(), t2, gradient_input);
+ tt::multiply_zero_padded(true, layer<tag>(sub).get_gradient_input(), t1, gradient_input);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const mult_prev_& , std::ostream& out)
+ {
+ serialize("mult_prev_", out);
+ }
+
+ friend void deserialize(mult_prev_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "mult_prev_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::mult_prev_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const mult_prev_& item)
+ {
+ out << "mult_prev"<<id;
+ return out;
+ }
+
+ friend void to_xml(const mult_prev_& item, std::ostream& out)
+ {
+ out << "<mult_prev tag='"<<id<<"'/>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ };
+
+ template <
+ template<typename> class tag,
+ typename SUBNET
+ >
+ using mult_prev = add_layer<mult_prev_<tag>, SUBNET>;
+
+ template <typename SUBNET> using mult_prev1 = mult_prev<tag1, SUBNET>;
+ template <typename SUBNET> using mult_prev2 = mult_prev<tag2, SUBNET>;
+ template <typename SUBNET> using mult_prev3 = mult_prev<tag3, SUBNET>;
+ template <typename SUBNET> using mult_prev4 = mult_prev<tag4, SUBNET>;
+ template <typename SUBNET> using mult_prev5 = mult_prev<tag5, SUBNET>;
+ template <typename SUBNET> using mult_prev6 = mult_prev<tag6, SUBNET>;
+ template <typename SUBNET> using mult_prev7 = mult_prev<tag7, SUBNET>;
+ template <typename SUBNET> using mult_prev8 = mult_prev<tag8, SUBNET>;
+ template <typename SUBNET> using mult_prev9 = mult_prev<tag9, SUBNET>;
+ template <typename SUBNET> using mult_prev10 = mult_prev<tag10, SUBNET>;
+
+ using mult_prev1_ = mult_prev_<tag1>;
+ using mult_prev2_ = mult_prev_<tag2>;
+ using mult_prev3_ = mult_prev_<tag3>;
+ using mult_prev4_ = mult_prev_<tag4>;
+ using mult_prev5_ = mult_prev_<tag5>;
+ using mult_prev6_ = mult_prev_<tag6>;
+ using mult_prev7_ = mult_prev_<tag7>;
+ using mult_prev8_ = mult_prev_<tag8>;
+ using mult_prev9_ = mult_prev_<tag9>;
+ using mult_prev10_ = mult_prev_<tag10>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ template<typename> class tag
+ >
+ class scale_
+ {
+ public:
+ const static unsigned long id = tag_id<tag>::id;
+
+ scale_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ auto&& scales = sub.get_output();
+ auto&& src = layer<tag>(sub).get_output();
+ DLIB_CASSERT(scales.num_samples() == src.num_samples() &&
+ scales.k() == src.k() &&
+ scales.nr() == 1 &&
+ scales.nc() == 1,
+ "scales.k(): " << scales.k() <<
+ "\nsrc.k(): " << src.k()
+ );
+
+ output.copy_size(src);
+ tt::scale_channels(false, output, src, scales);
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ auto&& scales = sub.get_output();
+ auto&& src = layer<tag>(sub).get_output();
+ // The gradient just flows backwards to the two layers that forward()
+ // read from.
+ tt::scale_channels(true, layer<tag>(sub).get_gradient_input(), gradient_input, scales);
+
+ if (reshape_src.num_samples() != src.num_samples())
+ {
+ reshape_scales = alias_tensor(src.num_samples()*src.k());
+ reshape_src = alias_tensor(src.num_samples()*src.k(),src.nr()*src.nc());
+ }
+
+ auto&& scales_grad = sub.get_gradient_input();
+ auto sgrad = reshape_scales(scales_grad);
+ tt::dot_prods(true, sgrad, reshape_src(src), reshape_src(gradient_input));
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const scale_& item, std::ostream& out)
+ {
+ serialize("scale_", out);
+ serialize(item.reshape_scales, out);
+ serialize(item.reshape_src, out);
+ }
+
+ friend void deserialize(scale_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "scale_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::scale_.");
+ deserialize(item.reshape_scales, in);
+ deserialize(item.reshape_src, in);
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const scale_& item)
+ {
+ out << "scale"<<id;
+ return out;
+ }
+
+ friend void to_xml(const scale_& item, std::ostream& out)
+ {
+ out << "<scale tag='"<<id<<"'/>\n";
+ }
+
+ private:
+ alias_tensor reshape_scales;
+ alias_tensor reshape_src;
+ resizable_tensor params;
+ };
+
+ template <
+ template<typename> class tag,
+ typename SUBNET
+ >
+ using scale = add_layer<scale_<tag>, SUBNET>;
+
+ template <typename SUBNET> using scale1 = scale<tag1, SUBNET>;
+ template <typename SUBNET> using scale2 = scale<tag2, SUBNET>;
+ template <typename SUBNET> using scale3 = scale<tag3, SUBNET>;
+ template <typename SUBNET> using scale4 = scale<tag4, SUBNET>;
+ template <typename SUBNET> using scale5 = scale<tag5, SUBNET>;
+ template <typename SUBNET> using scale6 = scale<tag6, SUBNET>;
+ template <typename SUBNET> using scale7 = scale<tag7, SUBNET>;
+ template <typename SUBNET> using scale8 = scale<tag8, SUBNET>;
+ template <typename SUBNET> using scale9 = scale<tag9, SUBNET>;
+ template <typename SUBNET> using scale10 = scale<tag10, SUBNET>;
+
+ using scale1_ = scale_<tag1>;
+ using scale2_ = scale_<tag2>;
+ using scale3_ = scale_<tag3>;
+ using scale4_ = scale_<tag4>;
+ using scale5_ = scale_<tag5>;
+ using scale6_ = scale_<tag6>;
+ using scale7_ = scale_<tag7>;
+ using scale8_ = scale_<tag8>;
+ using scale9_ = scale_<tag9>;
+ using scale10_ = scale_<tag10>;
+
+// ----------------------------------------------------------------------------------------
+
+ class relu_
+ {
+ public:
+ relu_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::relu(output, input);
+ }
+
+ void backward_inplace(
+ const tensor& computed_output,
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor&
+ )
+ {
+ tt::relu_gradient(data_grad, computed_output, gradient_input);
+ }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const relu_& , std::ostream& out)
+ {
+ serialize("relu_", out);
+ }
+
+ friend void deserialize(relu_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "relu_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::relu_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const relu_& )
+ {
+ out << "relu";
+ return out;
+ }
+
+ friend void to_xml(const relu_& /*item*/, std::ostream& out)
+ {
+ out << "<relu/>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ };
+
+
+ template <typename SUBNET>
+ using relu = add_layer<relu_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class prelu_
+ {
+ public:
+ explicit prelu_(
+ float initial_param_value_ = 0.25
+ ) : initial_param_value(initial_param_value_)
+ {
+ }
+
+ float get_initial_param_value (
+ ) const { return initial_param_value; }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ params.set_size(1);
+ params = initial_param_value;
+ }
+
+ template <typename SUBNET>
+ void forward(
+ const SUBNET& sub,
+ resizable_tensor& data_output
+ )
+ {
+ data_output.copy_size(sub.get_output());
+ tt::prelu(data_output, sub.get_output(), params);
+ }
+
+ template <typename SUBNET>
+ void backward(
+ const tensor& gradient_input,
+ SUBNET& sub,
+ tensor& params_grad
+ )
+ {
+ tt::prelu_gradient(sub.get_gradient_input(), sub.get_output(),
+ gradient_input, params, params_grad);
+ }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const prelu_& item, std::ostream& out)
+ {
+ serialize("prelu_", out);
+ serialize(item.params, out);
+ serialize(item.initial_param_value, out);
+ }
+
+ friend void deserialize(prelu_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "prelu_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::prelu_.");
+ deserialize(item.params, in);
+ deserialize(item.initial_param_value, in);
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const prelu_& item)
+ {
+ out << "prelu\t ("
+ << "initial_param_value="<<item.initial_param_value
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const prelu_& item, std::ostream& out)
+ {
+ out << "<prelu initial_param_value='"<<item.initial_param_value<<"'>\n";
+ out << mat(item.params);
+ out << "</prelu>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ float initial_param_value;
+ };
+
+ template <typename SUBNET>
+ using prelu = add_layer<prelu_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class sig_
+ {
+ public:
+ sig_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::sigmoid(output, input);
+ }
+
+ void backward_inplace(
+ const tensor& computed_output,
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor&
+ )
+ {
+ tt::sigmoid_gradient(data_grad, computed_output, gradient_input);
+ }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const sig_& , std::ostream& out)
+ {
+ serialize("sig_", out);
+ }
+
+ friend void deserialize(sig_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "sig_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::sig_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const sig_& )
+ {
+ out << "sig";
+ return out;
+ }
+
+ friend void to_xml(const sig_& /*item*/, std::ostream& out)
+ {
+ out << "<sig/>\n";
+ }
+
+
+ private:
+ resizable_tensor params;
+ };
+
+
+ template <typename SUBNET>
+ using sig = add_layer<sig_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class htan_
+ {
+ public:
+ htan_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+ inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::tanh(output, input);
+ }
+
+ void backward_inplace(
+ const tensor& computed_output,
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor&
+ )
+ {
+ tt::tanh_gradient(data_grad, computed_output, gradient_input);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const htan_& , std::ostream& out)
+ {
+ serialize("htan_", out);
+ }
+
+ friend void deserialize(htan_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "htan_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::htan_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const htan_& )
+ {
+ out << "htan";
+ return out;
+ }
+
+ friend void to_xml(const htan_& /*item*/, std::ostream& out)
+ {
+ out << "<htan/>\n";
+ }
+
+
+ private:
+ resizable_tensor params;
+ };
+
+
+ template <typename SUBNET>
+ using htan = add_layer<htan_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class softmax_
+ {
+ public:
+ softmax_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::softmax(output, input);
+ }
+
+ void backward_inplace(
+ const tensor& computed_output,
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor&
+ )
+ {
+ tt::softmax_gradient(data_grad, computed_output, gradient_input);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const softmax_& , std::ostream& out)
+ {
+ serialize("softmax_", out);
+ }
+
+ friend void deserialize(softmax_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "softmax_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const softmax_& )
+ {
+ out << "softmax";
+ return out;
+ }
+
+ friend void to_xml(const softmax_& /*item*/, std::ostream& out)
+ {
+ out << "<softmax/>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ };
+
+ template <typename SUBNET>
+ using softmax = add_layer<softmax_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ class softmax_all_
+ {
+ public:
+ softmax_all_()
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::softmax_all(output, input);
+ }
+
+ void backward_inplace(
+ const tensor& computed_output,
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor&
+ )
+ {
+ tt::softmax_all_gradient(data_grad, computed_output, gradient_input);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const softmax_all_& , std::ostream& out)
+ {
+ serialize("softmax_all_", out);
+ }
+
+ friend void deserialize(softmax_all_& , std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "softmax_all_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_all_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const softmax_all_& )
+ {
+ out << "softmax_all";
+ return out;
+ }
+
+ friend void to_xml(const softmax_all_& /*item*/, std::ostream& out)
+ {
+ out << "<softmax_all/>\n";
+ }
+
+ private:
+ resizable_tensor params;
+ };
+
+ template <typename SUBNET>
+ using softmax_all = add_layer<softmax_all_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ template <template<typename> class TAG_TYPE, template<typename> class... TAG_TYPES>
+ struct concat_helper_impl{
+
+ constexpr static size_t tag_count() {return 1 + concat_helper_impl<TAG_TYPES...>::tag_count();}
+ static void list_tags(std::ostream& out)
+ {
+ out << tag_id<TAG_TYPE>::id << (tag_count() > 1 ? "," : "");
+ concat_helper_impl<TAG_TYPES...>::list_tags(out);
+ }
+
+ template<typename SUBNET>
+ static void resize_out(resizable_tensor& out, const SUBNET& sub, long sum_k)
+ {
+ auto& t = layer<TAG_TYPE>(sub).get_output();
+ concat_helper_impl<TAG_TYPES...>::resize_out(out, sub, sum_k + t.k());
+ }
+ template<typename SUBNET>
+ static void concat(tensor& out, const SUBNET& sub, size_t k_offset)
+ {
+ auto& t = layer<TAG_TYPE>(sub).get_output();
+ tt::copy_tensor(false, out, k_offset, t, 0, t.k());
+ k_offset += t.k();
+ concat_helper_impl<TAG_TYPES...>::concat(out, sub, k_offset);
+ }
+ template<typename SUBNET>
+ static void split(const tensor& input, SUBNET& sub, size_t k_offset)
+ {
+ auto& t = layer<TAG_TYPE>(sub).get_gradient_input();
+ tt::copy_tensor(true, t, 0, input, k_offset, t.k());
+ k_offset += t.k();
+ concat_helper_impl<TAG_TYPES...>::split(input, sub, k_offset);
+ }
+ };
+ template <template<typename> class TAG_TYPE>
+ struct concat_helper_impl<TAG_TYPE>{
+ constexpr static size_t tag_count() {return 1;}
+ static void list_tags(std::ostream& out)
+ {
+ out << tag_id<TAG_TYPE>::id;
+ }
+
+ template<typename SUBNET>
+ static void resize_out(resizable_tensor& out, const SUBNET& sub, long sum_k)
+ {
+ auto& t = layer<TAG_TYPE>(sub).get_output();
+ out.set_size(t.num_samples(), t.k() + sum_k, t.nr(), t.nc());
+ }
+ template<typename SUBNET>
+ static void concat(tensor& out, const SUBNET& sub, size_t k_offset)
+ {
+ auto& t = layer<TAG_TYPE>(sub).get_output();
+ tt::copy_tensor(false, out, k_offset, t, 0, t.k());
+ }
+ template<typename SUBNET>
+ static void split(const tensor& input, SUBNET& sub, size_t k_offset)
+ {
+ auto& t = layer<TAG_TYPE>(sub).get_gradient_input();
+ tt::copy_tensor(true, t, 0, input, k_offset, t.k());
+ }
+ };
+ }
+ // concat layer
+ template<
+ template<typename> class... TAG_TYPES
+ >
+ class concat_
+ {
+ static void list_tags(std::ostream& out) { impl::concat_helper_impl<TAG_TYPES...>::list_tags(out);};
+
+ public:
+ constexpr static size_t tag_count() {return impl::concat_helper_impl<TAG_TYPES...>::tag_count();};
+
+ template <typename SUBNET>
+ void setup (const SUBNET&)
+ {
+ // do nothing
+ }
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ // the total depth of result is the sum of depths from all tags
+ impl::concat_helper_impl<TAG_TYPES...>::resize_out(output, sub, 0);
+
+ // copy output from each tag into different part result
+ impl::concat_helper_impl<TAG_TYPES...>::concat(output, sub, 0);
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor&)
+ {
+ // Gradient is split into parts for each tag layer
+ impl::concat_helper_impl<TAG_TYPES...>::split(gradient_input, sub, 0);
+ }
+
+ dpoint map_input_to_output(dpoint p) const { return p; }
+ dpoint map_output_to_input(dpoint p) const { return p; }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const concat_& item, std::ostream& out)
+ {
+ serialize("concat_", out);
+ size_t count = tag_count();
+ serialize(count, out);
+ }
+
+ friend void deserialize(concat_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "concat_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::concat_.");
+ size_t count_tags;
+ deserialize(count_tags, in);
+ if (count_tags != tag_count())
+ throw serialization_error("Invalid count of tags "+ std::to_string(count_tags) +", expecting " +
+ std::to_string(tag_count()) +
+ " found while deserializing dlib::concat_.");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const concat_& item)
+ {
+ out << "concat\t (";
+ list_tags(out);
+ out << ")";
+ return out;
+ }
+
+ friend void to_xml(const concat_& item, std::ostream& out)
+ {
+ out << "<concat tags='";
+ list_tags(out);
+ out << "'/>\n";
+ }
+
+ private:
+ resizable_tensor params; // unused
+ };
+
+
+ // concat layer definitions
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ typename SUBNET>
+ using concat2 = add_layer<concat_<TAG1, TAG2>, SUBNET>;
+
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ template<typename> class TAG3,
+ typename SUBNET>
+ using concat3 = add_layer<concat_<TAG1, TAG2, TAG3>, SUBNET>;
+
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ template<typename> class TAG3,
+ template<typename> class TAG4,
+ typename SUBNET>
+ using concat4 = add_layer<concat_<TAG1, TAG2, TAG3, TAG4>, SUBNET>;
+
+ template <template<typename> class TAG1,
+ template<typename> class TAG2,
+ template<typename> class TAG3,
+ template<typename> class TAG4,
+ template<typename> class TAG5,
+ typename SUBNET>
+ using concat5 = add_layer<concat_<TAG1, TAG2, TAG3, TAG4, TAG5>, SUBNET>;
+
+ // inception layer will use tags internally. If user will use tags too, some conflicts
+ // possible to exclude them, here are new tags specially for inceptions
+ template <typename SUBNET> using itag0 = add_tag_layer< 1000 + 0, SUBNET>;
+ template <typename SUBNET> using itag1 = add_tag_layer< 1000 + 1, SUBNET>;
+ template <typename SUBNET> using itag2 = add_tag_layer< 1000 + 2, SUBNET>;
+ template <typename SUBNET> using itag3 = add_tag_layer< 1000 + 3, SUBNET>;
+ template <typename SUBNET> using itag4 = add_tag_layer< 1000 + 4, SUBNET>;
+ template <typename SUBNET> using itag5 = add_tag_layer< 1000 + 5, SUBNET>;
+ // skip to inception input
+ template <typename SUBNET> using iskip = add_skip_layer< itag0, SUBNET>;
+
+ // here are some templates to be used for creating inception layer groups
+ template <template<typename>class B1,
+ template<typename>class B2,
+ typename SUBNET>
+ using inception2 = concat2<itag1, itag2, itag1<B1<iskip< itag2<B2< itag0<SUBNET>>>>>>>;
+
+ template <template<typename>class B1,
+ template<typename>class B2,
+ template<typename>class B3,
+ typename SUBNET>
+ using inception3 = concat3<itag1, itag2, itag3, itag1<B1<iskip< itag2<B2<iskip< itag3<B3< itag0<SUBNET>>>>>>>>>>;
+
+ template <template<typename>class B1,
+ template<typename>class B2,
+ template<typename>class B3,
+ template<typename>class B4,
+ typename SUBNET>
+ using inception4 = concat4<itag1, itag2, itag3, itag4,
+ itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4< itag0<SUBNET>>>>>>>>>>>>>;
+
+ template <template<typename>class B1,
+ template<typename>class B2,
+ template<typename>class B3,
+ template<typename>class B4,
+ template<typename>class B5,
+ typename SUBNET>
+ using inception5 = concat5<itag1, itag2, itag3, itag4, itag5,
+ itag1<B1<iskip< itag2<B2<iskip< itag3<B3<iskip< itag4<B4<iskip< itag5<B5< itag0<SUBNET>>>>>>>>>>>>>>>>;
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ const double DEFAULT_L2_NORM_EPS = 1e-5;
+
+ class l2normalize_
+ {
+ public:
+ explicit l2normalize_(
+ double eps_ = DEFAULT_L2_NORM_EPS
+ ) :
+ eps(eps_)
+ {
+ }
+
+ double get_eps() const { return eps; }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& /*sub*/)
+ {
+ }
+
+ void forward_inplace(const tensor& input, tensor& output)
+ {
+ tt::inverse_norms(norm, input, eps);
+ tt::scale_rows(output, input, norm);
+ }
+
+ void backward_inplace(
+ const tensor& computed_output,
+ const tensor& gradient_input,
+ tensor& data_grad,
+ tensor& /*params_grad*/
+ )
+ {
+ if (is_same_object(gradient_input, data_grad))
+ {
+ tt::dot_prods(temp, gradient_input, computed_output);
+ tt::scale_rows2(0, data_grad, gradient_input, computed_output, temp, norm);
+ }
+ else
+ {
+ tt::dot_prods(temp, gradient_input, computed_output);
+ tt::scale_rows2(1, data_grad, gradient_input, computed_output, temp, norm);
+ }
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const l2normalize_& item, std::ostream& out)
+ {
+ serialize("l2normalize_", out);
+ serialize(item.eps, out);
+ }
+
+ friend void deserialize(l2normalize_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "l2normalize_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::l2normalize_.");
+ deserialize(item.eps, in);
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const l2normalize_& item)
+ {
+ out << "l2normalize";
+ out << " eps="<<item.eps;
+ return out;
+ }
+
+ friend void to_xml(const l2normalize_& item, std::ostream& out)
+ {
+ out << "<l2normalize";
+ out << " eps='"<<item.eps<<"'";
+ out << "/>\n";
+ }
+ private:
+ double eps;
+
+ resizable_tensor params; // unused
+ // Here only to avoid reallocation and as a cache between forward/backward
+ // functions.
+ resizable_tensor norm;
+ resizable_tensor temp;
+ };
+
+ template <typename SUBNET>
+ using l2normalize = add_layer<l2normalize_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ long _offset,
+ long _k,
+ long _nr,
+ long _nc
+ >
+ class extract_
+ {
+ static_assert(_offset >= 0, "The offset must be >= 0.");
+ static_assert(_k > 0, "The number of channels must be > 0.");
+ static_assert(_nr > 0, "The number of rows must be > 0.");
+ static_assert(_nc > 0, "The number of columns must be > 0.");
+ public:
+ extract_(
+ )
+ {
+ }
+
+ template <typename SUBNET>
+ void setup (const SUBNET& sub)
+ {
+ DLIB_CASSERT((long)sub.get_output().size() >= sub.get_output().num_samples()*(_offset+_k*_nr*_nc),
+ "The tensor we are trying to extract from the input tensor is too big to fit into the input tensor.");
+
+ aout = alias_tensor(sub.get_output().num_samples(), _k*_nr*_nc);
+ ain = alias_tensor(sub.get_output().num_samples(), sub.get_output().size()/sub.get_output().num_samples());
+ }
+
+ template <typename SUBNET>
+ void forward(const SUBNET& sub, resizable_tensor& output)
+ {
+ if (aout.num_samples() != sub.get_output().num_samples())
+ {
+ aout = alias_tensor(sub.get_output().num_samples(), _k*_nr*_nc);
+ ain = alias_tensor(sub.get_output().num_samples(), sub.get_output().size()/sub.get_output().num_samples());
+ }
+
+ output.set_size(sub.get_output().num_samples(), _k, _nr, _nc);
+ auto out = aout(output,0);
+ auto in = ain(sub.get_output(),0);
+ tt::copy_tensor(false, out, 0, in, _offset, _k*_nr*_nc);
+ }
+
+ template <typename SUBNET>
+ void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+ {
+ auto out = ain(sub.get_gradient_input(),0);
+ auto in = aout(gradient_input,0);
+ tt::copy_tensor(true, out, _offset, in, 0, _k*_nr*_nc);
+ }
+
+ const tensor& get_layer_params() const { return params; }
+ tensor& get_layer_params() { return params; }
+
+ friend void serialize(const extract_& item, std::ostream& out)
+ {
+ serialize("extract_", out);
+ serialize(_offset, out);
+ serialize(_k, out);
+ serialize(_nr, out);
+ serialize(_nc, out);
+ }
+
+ friend void deserialize(extract_& item, std::istream& in)
+ {
+ std::string version;
+ deserialize(version, in);
+ if (version != "extract_")
+ throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::extract_.");
+
+ long offset;
+ long k;
+ long nr;
+ long nc;
+ deserialize(offset, in);
+ deserialize(k, in);
+ deserialize(nr, in);
+ deserialize(nc, in);
+
+ if (offset != _offset) throw serialization_error("Wrong offset found while deserializing dlib::extract_");
+ if (k != _k) throw serialization_error("Wrong k found while deserializing dlib::extract_");
+ if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::extract_");
+ if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::extract_");
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const extract_& item)
+ {
+ out << "extract\t ("
+ << "offset="<<_offset
+ << ", k="<<_k
+ << ", nr="<<_nr
+ << ", nc="<<_nc
+ << ")";
+ return out;
+ }
+
+ friend void to_xml(const extract_& item, std::ostream& out)
+ {
+ out << "<extract";
+ out << " offset='"<<_offset<<"'";
+ out << " k='"<<_k<<"'";
+ out << " nr='"<<_nr<<"'";
+ out << " nc='"<<_nc<<"'";
+ out << "/>\n";
+ }
+ private:
+ alias_tensor aout, ain;
+
+ resizable_tensor params; // unused
+ };
+
+ template <
+ long offset,
+ long k,
+ long nr,
+ long nc,
+ typename SUBNET
+ >
+ using extract = add_layer<extract_<offset,k,nr,nc>, SUBNET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DNn_LAYERS_H_
+
+