// Copyright (C) 2015 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_DNn_LAYERS_H_ #define DLIB_DNn_LAYERS_H_ #include "layers_abstract.h" #include "tensor.h" #include "core.h" #include #include #include "../rand.h" #include "../string.h" #include "tensor_tools.h" #include "../vectorstream.h" #include "utilities.h" #include namespace dlib { // ---------------------------------------------------------------------------------------- struct num_con_outputs { num_con_outputs(unsigned long n) : num_outputs(n) {} unsigned long num_outputs; }; template < long _num_filters, long _nr, long _nc, int _stride_y, int _stride_x, int _padding_y = _stride_y!=1? 0 : _nr/2, int _padding_x = _stride_x!=1? 0 : _nc/2 > class con_ { public: static_assert(_num_filters > 0, "The number of filters must be > 0"); static_assert(_nr >= 0, "The number of rows in a filter must be >= 0"); static_assert(_nc >= 0, "The number of columns in a filter must be >= 0"); static_assert(_stride_y > 0, "The filter stride must be > 0"); static_assert(_stride_x > 0, "The filter stride must be > 0"); static_assert(_nr==0 || (0 <= _padding_y && _padding_y < _nr), "The padding must be smaller than the filter size."); static_assert(_nc==0 || (0 <= _padding_x && _padding_x < _nc), "The padding must be smaller than the filter size."); static_assert(_nr!=0 || 0 == _padding_y, "If _nr==0 then the padding must be set to 0 as well."); static_assert(_nc!=0 || 0 == _padding_x, "If _nr==0 then the padding must be set to 0 as well."); con_( num_con_outputs o ) : learning_rate_multiplier(1), weight_decay_multiplier(1), bias_learning_rate_multiplier(1), bias_weight_decay_multiplier(0), num_filters_(o.num_outputs), padding_y_(_padding_y), padding_x_(_padding_x) { DLIB_CASSERT(num_filters_ > 0); } con_() : con_(num_con_outputs(_num_filters)) {} long num_filters() const { return num_filters_; } long nr() const { if (_nr==0) return filters.nr(); else return _nr; } long nc() const { if (_nc==0) return filters.nc(); else return _nc; } long stride_y() const { return _stride_y; } long stride_x() const { return _stride_x; } long padding_y() const { return padding_y_; } long padding_x() const { return padding_x_; } void set_num_filters(long num) { DLIB_CASSERT(num > 0); if (num != num_filters_) { DLIB_CASSERT(get_layer_params().size() == 0, "You can't change the number of filters in con_ if the parameter tensor has already been allocated."); num_filters_ = num; } } double get_learning_rate_multiplier () const { return learning_rate_multiplier; } double get_weight_decay_multiplier () const { return weight_decay_multiplier; } void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; } void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; } double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; } double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; } void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; } void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; } inline dpoint map_input_to_output ( dpoint p ) const { p.x() = (p.x()+padding_x()-nc()/2)/stride_x(); p.y() = (p.y()+padding_y()-nr()/2)/stride_y(); return p; } inline dpoint map_output_to_input ( dpoint p ) const { p.x() = p.x()*stride_x() - padding_x() + nc()/2; p.y() = p.y()*stride_y() - padding_y() + nr()/2; return p; } con_ ( const con_& item ) : params(item.params), filters(item.filters), biases(item.biases), learning_rate_multiplier(item.learning_rate_multiplier), weight_decay_multiplier(item.weight_decay_multiplier), bias_learning_rate_multiplier(item.bias_learning_rate_multiplier), bias_weight_decay_multiplier(item.bias_weight_decay_multiplier), num_filters_(item.num_filters_), padding_y_(item.padding_y_), padding_x_(item.padding_x_) { // this->conv is non-copyable and basically stateless, so we have to write our // own copy to avoid trying to copy it and getting an error. } con_& operator= ( const con_& item ) { if (this == &item) return *this; // this->conv is non-copyable and basically stateless, so we have to write our // own copy to avoid trying to copy it and getting an error. params = item.params; filters = item.filters; biases = item.biases; padding_y_ = item.padding_y_; padding_x_ = item.padding_x_; learning_rate_multiplier = item.learning_rate_multiplier; weight_decay_multiplier = item.weight_decay_multiplier; bias_learning_rate_multiplier = item.bias_learning_rate_multiplier; bias_weight_decay_multiplier = item.bias_weight_decay_multiplier; num_filters_ = item.num_filters_; return *this; } template void setup (const SUBNET& sub) { const long filt_nr = _nr!=0 ? _nr : sub.get_output().nr(); const long filt_nc = _nc!=0 ? _nc : sub.get_output().nc(); long num_inputs = filt_nr*filt_nc*sub.get_output().k(); long num_outputs = num_filters_; // allocate params for the filters and also for the filter bias values. params.set_size(num_inputs*num_filters_ + num_filters_); dlib::rand rnd(std::rand()); randomize_parameters(params, num_inputs+num_outputs, rnd); filters = alias_tensor(num_filters_, sub.get_output().k(), filt_nr, filt_nc); biases = alias_tensor(1,num_filters_); // set the initial bias values to zero biases(params,filters.size()) = 0; } template void forward(const SUBNET& sub, resizable_tensor& output) { conv.setup(sub.get_output(), filters(params,0), _stride_y, _stride_x, padding_y_, padding_x_); conv(false, output, sub.get_output(), filters(params,0)); tt::add(1,output,1,biases(params,filters.size())); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) { conv.get_gradient_for_data (true, gradient_input, filters(params,0), sub.get_gradient_input()); // no dpoint computing the parameter gradients if they won't be used. if (learning_rate_multiplier != 0) { auto filt = filters(params_grad,0); conv.get_gradient_for_filters (false, gradient_input, sub.get_output(), filt); auto b = biases(params_grad, filters.size()); tt::assign_conv_bias_gradient(b, gradient_input); } } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const con_& item, std::ostream& out) { serialize("con_4", out); serialize(item.params, out); serialize(item.num_filters_, out); serialize(_nr, out); serialize(_nc, out); serialize(_stride_y, out); serialize(_stride_x, out); serialize(item.padding_y_, out); serialize(item.padding_x_, out); serialize(item.filters, out); serialize(item.biases, out); serialize(item.learning_rate_multiplier, out); serialize(item.weight_decay_multiplier, out); serialize(item.bias_learning_rate_multiplier, out); serialize(item.bias_weight_decay_multiplier, out); } friend void deserialize(con_& item, std::istream& in) { std::string version; deserialize(version, in); long nr; long nc; int stride_y; int stride_x; if (version == "con_4") { deserialize(item.params, in); deserialize(item.num_filters_, in); deserialize(nr, in); deserialize(nc, in); deserialize(stride_y, in); deserialize(stride_x, in); deserialize(item.padding_y_, in); deserialize(item.padding_x_, in); deserialize(item.filters, in); deserialize(item.biases, in); deserialize(item.learning_rate_multiplier, in); deserialize(item.weight_decay_multiplier, in); deserialize(item.bias_learning_rate_multiplier, in); deserialize(item.bias_weight_decay_multiplier, in); if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::con_"); if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::con_"); if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::con_"); if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::con_"); if (stride_y != _stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::con_"); if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_"); } else { throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::con_."); } } friend std::ostream& operator<<(std::ostream& out, const con_& item) { out << "con\t (" << "num_filters="<\n"; out << mat(item.params); out << ""; } private: resizable_tensor params; alias_tensor filters, biases; tt::tensor_conv conv; double learning_rate_multiplier; double weight_decay_multiplier; double bias_learning_rate_multiplier; double bias_weight_decay_multiplier; long num_filters_; // These are here only because older versions of con (which you might encounter // serialized to disk) used different padding settings. int padding_y_; int padding_x_; }; template < long num_filters, long nr, long nc, int stride_y, int stride_x, typename SUBNET > using con = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- template < long _num_filters, long _nr, long _nc, int _stride_y, int _stride_x, int _padding_y = _stride_y!=1? 0 : _nr/2, int _padding_x = _stride_x!=1? 0 : _nc/2 > class cont_ { public: static_assert(_num_filters > 0, "The number of filters must be > 0"); static_assert(_nr > 0, "The number of rows in a filter must be > 0"); static_assert(_nc > 0, "The number of columns in a filter must be > 0"); static_assert(_stride_y > 0, "The filter stride must be > 0"); static_assert(_stride_x > 0, "The filter stride must be > 0"); static_assert(0 <= _padding_y && _padding_y < _nr, "The padding must be smaller than the filter size."); static_assert(0 <= _padding_x && _padding_x < _nc, "The padding must be smaller than the filter size."); cont_( num_con_outputs o ) : learning_rate_multiplier(1), weight_decay_multiplier(1), bias_learning_rate_multiplier(1), bias_weight_decay_multiplier(0), num_filters_(o.num_outputs), padding_y_(_padding_y), padding_x_(_padding_x) { DLIB_CASSERT(num_filters_ > 0); } cont_() : cont_(num_con_outputs(_num_filters)) {} long num_filters() const { return num_filters_; } long nr() const { return _nr; } long nc() const { return _nc; } long stride_y() const { return _stride_y; } long stride_x() const { return _stride_x; } long padding_y() const { return padding_y_; } long padding_x() const { return padding_x_; } void set_num_filters(long num) { DLIB_CASSERT(num > 0); if (num != num_filters_) { DLIB_CASSERT(get_layer_params().size() == 0, "You can't change the number of filters in cont_ if the parameter tensor has already been allocated."); num_filters_ = num; } } double get_learning_rate_multiplier () const { return learning_rate_multiplier; } double get_weight_decay_multiplier () const { return weight_decay_multiplier; } void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; } void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; } double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; } double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; } void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; } void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; } inline dpoint map_output_to_input ( dpoint p ) const { p.x() = (p.x()+padding_x()-nc()/2)/stride_x(); p.y() = (p.y()+padding_y()-nr()/2)/stride_y(); return p; } inline dpoint map_input_to_output ( dpoint p ) const { p.x() = p.x()*stride_x() - padding_x() + nc()/2; p.y() = p.y()*stride_y() - padding_y() + nr()/2; return p; } cont_ ( const cont_& item ) : params(item.params), filters(item.filters), biases(item.biases), learning_rate_multiplier(item.learning_rate_multiplier), weight_decay_multiplier(item.weight_decay_multiplier), bias_learning_rate_multiplier(item.bias_learning_rate_multiplier), bias_weight_decay_multiplier(item.bias_weight_decay_multiplier), num_filters_(item.num_filters_), padding_y_(item.padding_y_), padding_x_(item.padding_x_) { // this->conv is non-copyable and basically stateless, so we have to write our // own copy to avoid trying to copy it and getting an error. } cont_& operator= ( const cont_& item ) { if (this == &item) return *this; // this->conv is non-copyable and basically stateless, so we have to write our // own copy to avoid trying to copy it and getting an error. params = item.params; filters = item.filters; biases = item.biases; padding_y_ = item.padding_y_; padding_x_ = item.padding_x_; learning_rate_multiplier = item.learning_rate_multiplier; weight_decay_multiplier = item.weight_decay_multiplier; bias_learning_rate_multiplier = item.bias_learning_rate_multiplier; bias_weight_decay_multiplier = item.bias_weight_decay_multiplier; num_filters_ = item.num_filters_; return *this; } template void setup (const SUBNET& sub) { long num_inputs = _nr*_nc*sub.get_output().k(); long num_outputs = num_filters_; // allocate params for the filters and also for the filter bias values. params.set_size(num_inputs*num_filters_ + num_filters_); dlib::rand rnd(std::rand()); randomize_parameters(params, num_inputs+num_outputs, rnd); filters = alias_tensor(sub.get_output().k(), num_filters_, _nr, _nc); biases = alias_tensor(1,num_filters_); // set the initial bias values to zero biases(params,filters.size()) = 0; } template void forward(const SUBNET& sub, resizable_tensor& output) { auto filt = filters(params,0); unsigned int gnr = _stride_y * (sub.get_output().nr() - 1) + filt.nr() - 2 * padding_y_; unsigned int gnc = _stride_x * (sub.get_output().nc() - 1) + filt.nc() - 2 * padding_x_; unsigned int gnsamps = sub.get_output().num_samples(); unsigned int gk = filt.k(); output.set_size(gnsamps,gk,gnr,gnc); conv.setup(output,filt,_stride_y,_stride_x,padding_y_,padding_x_); conv.get_gradient_for_data(false, sub.get_output(),filt,output); tt::add(1,output,1,biases(params,filters.size())); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) { auto filt = filters(params,0); conv(true, sub.get_gradient_input(),gradient_input, filt); // no point computing the parameter gradients if they won't be used. if (learning_rate_multiplier != 0) { auto filt = filters(params_grad,0); conv.get_gradient_for_filters (false, sub.get_output(),gradient_input, filt); auto b = biases(params_grad, filters.size()); tt::assign_conv_bias_gradient(b, gradient_input); } } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const cont_& item, std::ostream& out) { serialize("cont_1", out); serialize(item.params, out); serialize(item.num_filters_, out); serialize(_nr, out); serialize(_nc, out); serialize(_stride_y, out); serialize(_stride_x, out); serialize(item.padding_y_, out); serialize(item.padding_x_, out); serialize(item.filters, out); serialize(item.biases, out); serialize(item.learning_rate_multiplier, out); serialize(item.weight_decay_multiplier, out); serialize(item.bias_learning_rate_multiplier, out); serialize(item.bias_weight_decay_multiplier, out); } friend void deserialize(cont_& item, std::istream& in) { std::string version; deserialize(version, in); long nr; long nc; int stride_y; int stride_x; if (version == "cont_1") { deserialize(item.params, in); deserialize(item.num_filters_, in); deserialize(nr, in); deserialize(nc, in); deserialize(stride_y, in); deserialize(stride_x, in); deserialize(item.padding_y_, in); deserialize(item.padding_x_, in); deserialize(item.filters, in); deserialize(item.biases, in); deserialize(item.learning_rate_multiplier, in); deserialize(item.weight_decay_multiplier, in); deserialize(item.bias_learning_rate_multiplier, in); deserialize(item.bias_weight_decay_multiplier, in); if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::con_"); if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::con_"); if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::con_"); if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::con_"); if (stride_y != _stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::con_"); if (stride_x != _stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::con_"); } else { throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::con_."); } } friend std::ostream& operator<<(std::ostream& out, const cont_& item) { out << "cont\t (" << "num_filters="<\n"; out << mat(item.params); out << ""; } private: resizable_tensor params; alias_tensor filters, biases; tt::tensor_conv conv; double learning_rate_multiplier; double weight_decay_multiplier; double bias_learning_rate_multiplier; double bias_weight_decay_multiplier; long num_filters_; int padding_y_; int padding_x_; }; template < long num_filters, long nr, long nc, int stride_y, int stride_x, typename SUBNET > using cont = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- template < int scale_y, int scale_x > class upsample_ { public: static_assert(scale_y >= 1, "upsampling scale factor can't be less than 1."); static_assert(scale_x >= 1, "upsampling scale factor can't be less than 1."); upsample_() { } template void setup (const SUBNET& /*sub*/) { } template void forward(const SUBNET& sub, resizable_tensor& output) { output.set_size( sub.get_output().num_samples(), sub.get_output().k(), scale_y*sub.get_output().nr(), scale_x*sub.get_output().nc()); tt::resize_bilinear(output, sub.get_output()); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { tt::resize_bilinear_gradient(sub.get_gradient_input(), gradient_input); } inline dpoint map_input_to_output (dpoint p) const { p.x() = p.x()*scale_x; p.y() = p.y()*scale_y; return p; } inline dpoint map_output_to_input (dpoint p) const { p.x() = p.x()/scale_x; p.y() = p.y()/scale_y; return p; } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const upsample_& , std::ostream& out) { serialize("upsample_", out); serialize(scale_y, out); serialize(scale_x, out); } friend void deserialize(upsample_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "upsample_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::upsample_."); int _scale_y; int _scale_x; deserialize(_scale_y, in); deserialize(_scale_x, in); if (_scale_y != scale_y || _scale_x != scale_x) throw serialization_error("Wrong scale found while deserializing dlib::upsample_"); } friend std::ostream& operator<<(std::ostream& out, const upsample_& ) { out << "upsample\t (" << "scale_y="<\n"; } private: resizable_tensor params; }; template < int scale, typename SUBNET > using upsample = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- template < long _nr, long _nc, int _stride_y, int _stride_x, int _padding_y = _stride_y!=1? 0 : _nr/2, int _padding_x = _stride_x!=1? 0 : _nc/2 > class max_pool_ { static_assert(_nr >= 0, "The number of rows in a filter must be >= 0"); static_assert(_nc >= 0, "The number of columns in a filter must be >= 0"); static_assert(_stride_y > 0, "The filter stride must be > 0"); static_assert(_stride_x > 0, "The filter stride must be > 0"); static_assert(0 <= _padding_y && ((_nr==0 && _padding_y == 0) || (_nr!=0 && _padding_y < _nr)), "The padding must be smaller than the filter size, unless the filters size is 0."); static_assert(0 <= _padding_x && ((_nc==0 && _padding_x == 0) || (_nc!=0 && _padding_x < _nc)), "The padding must be smaller than the filter size, unless the filters size is 0."); public: max_pool_( ) : padding_y_(_padding_y), padding_x_(_padding_x) {} long nr() const { return _nr; } long nc() const { return _nc; } long stride_y() const { return _stride_y; } long stride_x() const { return _stride_x; } long padding_y() const { return padding_y_; } long padding_x() const { return padding_x_; } inline dpoint map_input_to_output ( dpoint p ) const { p.x() = (p.x()+padding_x()-nc()/2)/stride_x(); p.y() = (p.y()+padding_y()-nr()/2)/stride_y(); return p; } inline dpoint map_output_to_input ( dpoint p ) const { p.x() = p.x()*stride_x() - padding_x() + nc()/2; p.y() = p.y()*stride_y() - padding_y() + nr()/2; return p; } max_pool_ ( const max_pool_& item ) : padding_y_(item.padding_y_), padding_x_(item.padding_x_) { // this->mp is non-copyable so we have to write our own copy to avoid trying to // copy it and getting an error. } max_pool_& operator= ( const max_pool_& item ) { if (this == &item) return *this; padding_y_ = item.padding_y_; padding_x_ = item.padding_x_; // this->mp is non-copyable so we have to write our own copy to avoid trying to // copy it and getting an error. return *this; } template void setup (const SUBNET& /*sub*/) { } template void forward(const SUBNET& sub, resizable_tensor& output) { mp.setup_max_pooling(_nr!=0?_nr:sub.get_output().nr(), _nc!=0?_nc:sub.get_output().nc(), _stride_y, _stride_x, padding_y_, padding_x_); mp(output, sub.get_output()); } template void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { mp.setup_max_pooling(_nr!=0?_nr:sub.get_output().nr(), _nc!=0?_nc:sub.get_output().nc(), _stride_y, _stride_x, padding_y_, padding_x_); mp.get_gradient(gradient_input, computed_output, sub.get_output(), sub.get_gradient_input()); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const max_pool_& item, std::ostream& out) { serialize("max_pool_2", out); serialize(_nr, out); serialize(_nc, out); serialize(_stride_y, out); serialize(_stride_x, out); serialize(item.padding_y_, out); serialize(item.padding_x_, out); } friend void deserialize(max_pool_& item, std::istream& in) { std::string version; deserialize(version, in); long nr; long nc; int stride_y; int stride_x; if (version == "max_pool_2") { deserialize(nr, in); deserialize(nc, in); deserialize(stride_y, in); deserialize(stride_x, in); deserialize(item.padding_y_, in); deserialize(item.padding_x_, in); } else { throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::max_pool_."); } if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::max_pool_"); if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::max_pool_"); if (_nr != nr) throw serialization_error("Wrong nr found while deserializing dlib::max_pool_"); if (_nc != nc) throw serialization_error("Wrong nc found while deserializing dlib::max_pool_"); if (_stride_y != stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::max_pool_"); if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::max_pool_"); } friend std::ostream& operator<<(std::ostream& out, const max_pool_& item) { out << "max_pool (" << "nr="<<_nr << ", nc="<<_nc << ", stride_y="<<_stride_y << ", stride_x="<<_stride_x << ", padding_y="<\n"; } private: tt::pooling mp; resizable_tensor params; int padding_y_; int padding_x_; }; template < long nr, long nc, int stride_y, int stride_x, typename SUBNET > using max_pool = add_layer, SUBNET>; template < typename SUBNET > using max_pool_everything = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- template < long _nr, long _nc, int _stride_y, int _stride_x, int _padding_y = _stride_y!=1? 0 : _nr/2, int _padding_x = _stride_x!=1? 0 : _nc/2 > class avg_pool_ { public: static_assert(_nr >= 0, "The number of rows in a filter must be >= 0"); static_assert(_nc >= 0, "The number of columns in a filter must be >= 0"); static_assert(_stride_y > 0, "The filter stride must be > 0"); static_assert(_stride_x > 0, "The filter stride must be > 0"); static_assert(0 <= _padding_y && ((_nr==0 && _padding_y == 0) || (_nr!=0 && _padding_y < _nr)), "The padding must be smaller than the filter size, unless the filters size is 0."); static_assert(0 <= _padding_x && ((_nc==0 && _padding_x == 0) || (_nc!=0 && _padding_x < _nc)), "The padding must be smaller than the filter size, unless the filters size is 0."); avg_pool_( ) : padding_y_(_padding_y), padding_x_(_padding_x) {} long nr() const { return _nr; } long nc() const { return _nc; } long stride_y() const { return _stride_y; } long stride_x() const { return _stride_x; } long padding_y() const { return padding_y_; } long padding_x() const { return padding_x_; } inline dpoint map_input_to_output ( dpoint p ) const { p.x() = (p.x()+padding_x()-nc()/2)/stride_x(); p.y() = (p.y()+padding_y()-nr()/2)/stride_y(); return p; } inline dpoint map_output_to_input ( dpoint p ) const { p.x() = p.x()*stride_x() - padding_x() + nc()/2; p.y() = p.y()*stride_y() - padding_y() + nr()/2; return p; } avg_pool_ ( const avg_pool_& item ) : padding_y_(item.padding_y_), padding_x_(item.padding_x_) { // this->ap is non-copyable so we have to write our own copy to avoid trying to // copy it and getting an error. } avg_pool_& operator= ( const avg_pool_& item ) { if (this == &item) return *this; padding_y_ = item.padding_y_; padding_x_ = item.padding_x_; // this->ap is non-copyable so we have to write our own copy to avoid trying to // copy it and getting an error. return *this; } template void setup (const SUBNET& /*sub*/) { } template void forward(const SUBNET& sub, resizable_tensor& output) { ap.setup_avg_pooling(_nr!=0?_nr:sub.get_output().nr(), _nc!=0?_nc:sub.get_output().nc(), _stride_y, _stride_x, padding_y_, padding_x_); ap(output, sub.get_output()); } template void backward(const tensor& computed_output, const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { ap.setup_avg_pooling(_nr!=0?_nr:sub.get_output().nr(), _nc!=0?_nc:sub.get_output().nc(), _stride_y, _stride_x, padding_y_, padding_x_); ap.get_gradient(gradient_input, computed_output, sub.get_output(), sub.get_gradient_input()); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const avg_pool_& item, std::ostream& out) { serialize("avg_pool_2", out); serialize(_nr, out); serialize(_nc, out); serialize(_stride_y, out); serialize(_stride_x, out); serialize(item.padding_y_, out); serialize(item.padding_x_, out); } friend void deserialize(avg_pool_& item, std::istream& in) { std::string version; deserialize(version, in); long nr; long nc; int stride_y; int stride_x; if (version == "avg_pool_2") { deserialize(nr, in); deserialize(nc, in); deserialize(stride_y, in); deserialize(stride_x, in); deserialize(item.padding_y_, in); deserialize(item.padding_x_, in); } else { throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::avg_pool_."); } if (item.padding_y_ != _padding_y) throw serialization_error("Wrong padding_y found while deserializing dlib::avg_pool_"); if (item.padding_x_ != _padding_x) throw serialization_error("Wrong padding_x found while deserializing dlib::avg_pool_"); if (_nr != nr) throw serialization_error("Wrong nr found while deserializing dlib::avg_pool_"); if (_nc != nc) throw serialization_error("Wrong nc found while deserializing dlib::avg_pool_"); if (_stride_y != stride_y) throw serialization_error("Wrong stride_y found while deserializing dlib::avg_pool_"); if (_stride_x != stride_x) throw serialization_error("Wrong stride_x found while deserializing dlib::avg_pool_"); } friend std::ostream& operator<<(std::ostream& out, const avg_pool_& item) { out << "avg_pool (" << "nr="<<_nr << ", nc="<<_nc << ", stride_y="<<_stride_y << ", stride_x="<<_stride_x << ", padding_y="<\n"; } private: tt::pooling ap; resizable_tensor params; int padding_y_; int padding_x_; }; template < long nr, long nc, int stride_y, int stride_x, typename SUBNET > using avg_pool = add_layer, SUBNET>; template < typename SUBNET > using avg_pool_everything = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- enum layer_mode { CONV_MODE = 0, FC_MODE = 1 }; const double DEFAULT_BATCH_NORM_EPS = 0.0001; template < layer_mode mode > class bn_ { public: explicit bn_( unsigned long window_size, double eps_ = DEFAULT_BATCH_NORM_EPS ) : num_updates(0), running_stats_window_size(window_size), learning_rate_multiplier(1), weight_decay_multiplier(0), bias_learning_rate_multiplier(1), bias_weight_decay_multiplier(1), eps(eps_) { DLIB_CASSERT(window_size > 0, "The batch normalization running stats window size can't be 0."); } bn_() : bn_(100) {} layer_mode get_mode() const { return mode; } unsigned long get_running_stats_window_size () const { return running_stats_window_size; } void set_running_stats_window_size (unsigned long new_window_size ) { DLIB_CASSERT(new_window_size > 0, "The batch normalization running stats window size can't be 0."); running_stats_window_size = new_window_size; } double get_eps() const { return eps; } double get_learning_rate_multiplier () const { return learning_rate_multiplier; } double get_weight_decay_multiplier () const { return weight_decay_multiplier; } void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; } void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; } double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; } double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; } void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; } void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } template void setup (const SUBNET& sub) { if (mode == FC_MODE) { gamma = alias_tensor(1, sub.get_output().k(), sub.get_output().nr(), sub.get_output().nc()); } else { gamma = alias_tensor(1, sub.get_output().k()); } beta = gamma; params.set_size(gamma.size()+beta.size()); gamma(params,0) = 1; beta(params,gamma.size()) = 0; running_means.copy_size(gamma(params,0)); running_variances.copy_size(gamma(params,0)); running_means = 0; running_variances = 1; num_updates = 0; } template void forward(const SUBNET& sub, resizable_tensor& output) { auto g = gamma(params,0); auto b = beta(params,gamma.size()); if (sub.get_output().num_samples() > 1) { const double decay = 1.0 - num_updates/(num_updates+1.0); ++num_updates; if (num_updates > running_stats_window_size) num_updates = running_stats_window_size; if (mode == FC_MODE) tt::batch_normalize(eps, output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b); else tt::batch_normalize_conv(eps, output, means, invstds, decay, running_means, running_variances, sub.get_output(), g, b); } else // we are running in testing mode so we just linearly scale the input tensor. { if (mode == FC_MODE) tt::batch_normalize_inference(eps, output, sub.get_output(), g, b, running_means, running_variances); else tt::batch_normalize_conv_inference(eps, output, sub.get_output(), g, b, running_means, running_variances); } } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) { auto g = gamma(params,0); auto g_grad = gamma(params_grad, 0); auto b_grad = beta(params_grad, gamma.size()); if (mode == FC_MODE) tt::batch_normalize_gradient(eps, gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad ); else tt::batch_normalize_conv_gradient(eps, gradient_input, means, invstds, sub.get_output(), g, sub.get_gradient_input(), g_grad, b_grad ); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const bn_& item, std::ostream& out) { if (mode == CONV_MODE) serialize("bn_con2", out); else // if FC_MODE serialize("bn_fc2", out); serialize(item.params, out); serialize(item.gamma, out); serialize(item.beta, out); serialize(item.means, out); serialize(item.invstds, out); serialize(item.running_means, out); serialize(item.running_variances, out); serialize(item.num_updates, out); serialize(item.running_stats_window_size, out); serialize(item.learning_rate_multiplier, out); serialize(item.weight_decay_multiplier, out); serialize(item.bias_learning_rate_multiplier, out); serialize(item.bias_weight_decay_multiplier, out); serialize(item.eps, out); } friend void deserialize(bn_& item, std::istream& in) { std::string version; deserialize(version, in); if (mode == CONV_MODE) { if (version != "bn_con2") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::bn_."); } else // must be in FC_MODE { if (version != "bn_fc2") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::bn_."); } deserialize(item.params, in); deserialize(item.gamma, in); deserialize(item.beta, in); deserialize(item.means, in); deserialize(item.invstds, in); deserialize(item.running_means, in); deserialize(item.running_variances, in); deserialize(item.num_updates, in); deserialize(item.running_stats_window_size, in); deserialize(item.learning_rate_multiplier, in); deserialize(item.weight_decay_multiplier, in); deserialize(item.bias_learning_rate_multiplier, in); deserialize(item.bias_weight_decay_multiplier, in); deserialize(item.eps, in); } friend std::ostream& operator<<(std::ostream& out, const bn_& item) { if (mode == CONV_MODE) out << "bn_con "; else out << "bn_fc "; out << " eps="<\n"; out << mat(item.params); if (mode==CONV_MODE) out << "\n"; else out << "\n"; } private: friend class affine_; resizable_tensor params; alias_tensor gamma, beta; resizable_tensor means, running_means; resizable_tensor invstds, running_variances; unsigned long num_updates; unsigned long running_stats_window_size; double learning_rate_multiplier; double weight_decay_multiplier; double bias_learning_rate_multiplier; double bias_weight_decay_multiplier; double eps; }; template using bn_con = add_layer, SUBNET>; template using bn_fc = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- namespace impl { class visitor_bn_running_stats_window_size { public: visitor_bn_running_stats_window_size(unsigned long new_window_size_) : new_window_size(new_window_size_) {} template void set_window_size(T&) const { // ignore other layer detail types } template < layer_mode mode > void set_window_size(bn_& l) const { l.set_running_stats_window_size(new_window_size); } template void operator()(size_t , input_layer_type& ) const { // ignore other layers } template void operator()(size_t , add_layer& l) const { set_window_size(l.layer_details()); } private: unsigned long new_window_size; }; } template void set_all_bn_running_stats_window_sizes ( net_type& net, unsigned long new_window_size ) { visit_layers(net, impl::visitor_bn_running_stats_window_size(new_window_size)); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- enum fc_bias_mode { FC_HAS_BIAS = 0, FC_NO_BIAS = 1 }; struct num_fc_outputs { num_fc_outputs(unsigned long n) : num_outputs(n) {} unsigned long num_outputs; }; template < unsigned long num_outputs_, fc_bias_mode bias_mode > class fc_ { static_assert(num_outputs_ > 0, "The number of outputs from a fc_ layer must be > 0"); public: fc_(num_fc_outputs o) : num_outputs(o.num_outputs), num_inputs(0), learning_rate_multiplier(1), weight_decay_multiplier(1), bias_learning_rate_multiplier(1), bias_weight_decay_multiplier(0) {} fc_() : fc_(num_fc_outputs(num_outputs_)) {} double get_learning_rate_multiplier () const { return learning_rate_multiplier; } double get_weight_decay_multiplier () const { return weight_decay_multiplier; } void set_learning_rate_multiplier(double val) { learning_rate_multiplier = val; } void set_weight_decay_multiplier(double val) { weight_decay_multiplier = val; } double get_bias_learning_rate_multiplier () const { return bias_learning_rate_multiplier; } double get_bias_weight_decay_multiplier () const { return bias_weight_decay_multiplier; } void set_bias_learning_rate_multiplier(double val) { bias_learning_rate_multiplier = val; } void set_bias_weight_decay_multiplier(double val) { bias_weight_decay_multiplier = val; } unsigned long get_num_outputs ( ) const { return num_outputs; } void set_num_outputs(long num) { DLIB_CASSERT(num > 0); if (num != (long)num_outputs) { DLIB_CASSERT(get_layer_params().size() == 0, "You can't change the number of filters in fc_ if the parameter tensor has already been allocated."); num_outputs = num; } } fc_bias_mode get_bias_mode ( ) const { return bias_mode; } template void setup (const SUBNET& sub) { num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k(); if (bias_mode == FC_HAS_BIAS) params.set_size(num_inputs+1, num_outputs); else params.set_size(num_inputs, num_outputs); dlib::rand rnd(std::rand()); randomize_parameters(params, num_inputs+num_outputs, rnd); weights = alias_tensor(num_inputs, num_outputs); if (bias_mode == FC_HAS_BIAS) { biases = alias_tensor(1,num_outputs); // set the initial bias values to zero biases(params,weights.size()) = 0; } } template void forward(const SUBNET& sub, resizable_tensor& output) { DLIB_CASSERT((long)num_inputs == sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k(), "The size of the input tensor to this fc layer doesn't match the size the fc layer was trained with."); output.set_size(sub.get_output().num_samples(), num_outputs); auto w = weights(params, 0); tt::gemm(0,output, 1,sub.get_output(),false, w,false); if (bias_mode == FC_HAS_BIAS) { auto b = biases(params, weights.size()); tt::add(1,output,1,b); } } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad) { // no point computing the parameter gradients if they won't be used. if (learning_rate_multiplier != 0) { // compute the gradient of the weight parameters. auto pw = weights(params_grad, 0); tt::gemm(0,pw, 1,sub.get_output(),true, gradient_input,false); if (bias_mode == FC_HAS_BIAS) { // compute the gradient of the bias parameters. auto pb = biases(params_grad, weights.size()); tt::assign_bias_gradient(pb, gradient_input); } } // compute the gradient for the data auto w = weights(params, 0); tt::gemm(1,sub.get_gradient_input(), 1,gradient_input,false, w,true); } alias_tensor_instance get_weights() { return weights(params, 0); } alias_tensor_const_instance get_weights() const { return weights(params, 0); } alias_tensor_instance get_biases() { static_assert(bias_mode == FC_HAS_BIAS, "This fc_ layer doesn't have a bias vector " "to be retrieved, as per template parameter 'bias_mode'."); return biases(params, weights.size()); } alias_tensor_const_instance get_biases() const { static_assert(bias_mode == FC_HAS_BIAS, "This fc_ layer doesn't have a bias vector " "to be retrieved, as per template parameter 'bias_mode'."); return biases(params, weights.size()); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const fc_& item, std::ostream& out) { serialize("fc_2", out); serialize(item.num_outputs, out); serialize(item.num_inputs, out); serialize(item.params, out); serialize(item.weights, out); serialize(item.biases, out); serialize((int)bias_mode, out); serialize(item.learning_rate_multiplier, out); serialize(item.weight_decay_multiplier, out); serialize(item.bias_learning_rate_multiplier, out); serialize(item.bias_weight_decay_multiplier, out); } friend void deserialize(fc_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "fc_2") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::fc_."); deserialize(item.num_outputs, in); deserialize(item.num_inputs, in); deserialize(item.params, in); deserialize(item.weights, in); deserialize(item.biases, in); int bmode = 0; deserialize(bmode, in); if (bias_mode != (fc_bias_mode)bmode) throw serialization_error("Wrong fc_bias_mode found while deserializing dlib::fc_"); deserialize(item.learning_rate_multiplier, in); deserialize(item.weight_decay_multiplier, in); deserialize(item.bias_learning_rate_multiplier, in); deserialize(item.bias_weight_decay_multiplier, in); } friend std::ostream& operator<<(std::ostream& out, const fc_& item) { if (bias_mode == FC_HAS_BIAS) { out << "fc\t (" << "num_outputs="<\n"; out << mat(item.params); out << "\n"; } else { out << "\n"; out << mat(item.params); out << "\n"; } } private: unsigned long num_outputs; unsigned long num_inputs; resizable_tensor params; alias_tensor weights, biases; double learning_rate_multiplier; double weight_decay_multiplier; double bias_learning_rate_multiplier; double bias_weight_decay_multiplier; }; template < unsigned long num_outputs, typename SUBNET > using fc = add_layer, SUBNET>; template < unsigned long num_outputs, typename SUBNET > using fc_no_bias = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- class dropout_ { public: explicit dropout_( float drop_rate_ = 0.5 ) : drop_rate(drop_rate_), rnd(std::rand()) { DLIB_CASSERT(0 <= drop_rate && drop_rate <= 1); } // We have to add a copy constructor and assignment operator because the rnd object // is non-copyable. dropout_( const dropout_& item ) : drop_rate(item.drop_rate), mask(item.mask), rnd(std::rand()) {} dropout_& operator= ( const dropout_& item ) { if (this == &item) return *this; drop_rate = item.drop_rate; mask = item.mask; return *this; } float get_drop_rate ( ) const { return drop_rate; } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { // create a random mask and use it to filter the data mask.copy_size(input); rnd.fill_uniform(mask); tt::threshold(mask, drop_rate); tt::multiply(false, output, input, mask); } void backward_inplace( const tensor& gradient_input, tensor& data_grad, tensor& /*params_grad*/ ) { if (is_same_object(gradient_input, data_grad)) tt::multiply(false, data_grad, mask, gradient_input); else tt::multiply(true, data_grad, mask, gradient_input); } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const dropout_& item, std::ostream& out) { serialize("dropout_", out); serialize(item.drop_rate, out); serialize(item.mask, out); } friend void deserialize(dropout_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "dropout_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::dropout_."); deserialize(item.drop_rate, in); deserialize(item.mask, in); } void clean( ) { mask.clear(); } friend std::ostream& operator<<(std::ostream& out, const dropout_& item) { out << "dropout\t (" << "drop_rate="<\n"; } private: float drop_rate; resizable_tensor mask; tt::tensor_rand rnd; resizable_tensor params; // unused }; template using dropout = add_layer; // ---------------------------------------------------------------------------------------- class multiply_ { public: explicit multiply_( float val_ = 0.5 ) : val(val_) { } multiply_ ( const dropout_& item ) : val(1-item.get_drop_rate()) {} float get_multiply_value ( ) const { return val; } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { tt::affine_transform(output, input, val); } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } void backward_inplace( const tensor& gradient_input, tensor& data_grad, tensor& /*params_grad*/ ) { if (is_same_object(gradient_input, data_grad)) tt::affine_transform(data_grad, gradient_input, val); else tt::affine_transform(data_grad, data_grad, gradient_input, 1, val); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const multiply_& item, std::ostream& out) { serialize("multiply_", out); serialize(item.val, out); } friend void deserialize(multiply_& item, std::istream& in) { std::string version; deserialize(version, in); if (version == "dropout_") { // Since we can build a multiply_ from a dropout_ we check if that's what // is in the stream and if so then just convert it right here. unserialize sin(version, in); dropout_ temp; deserialize(temp, sin); item = temp; return; } if (version != "multiply_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::multiply_."); deserialize(item.val, in); } friend std::ostream& operator<<(std::ostream& out, const multiply_& item) { out << "multiply (" << "val="<\n"; } private: float val; resizable_tensor params; // unused }; template using multiply = add_layer; // ---------------------------------------------------------------------------------------- class affine_ { public: affine_( ) : mode(FC_MODE) { } affine_( layer_mode mode_ ) : mode(mode_) { } template < layer_mode bnmode > affine_( const bn_& item ) { gamma = item.gamma; beta = item.beta; mode = bnmode; params.copy_size(item.params); auto g = gamma(params,0); auto b = beta(params,gamma.size()); resizable_tensor temp(item.params); auto sg = gamma(temp,0); auto sb = beta(temp,gamma.size()); g = pointwise_multiply(mat(sg), 1.0f/sqrt(mat(item.running_variances)+item.get_eps())); b = mat(sb) - pointwise_multiply(mat(g), mat(item.running_means)); } layer_mode get_mode() const { return mode; } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } template void setup (const SUBNET& sub) { if (mode == FC_MODE) { gamma = alias_tensor(1, sub.get_output().k(), sub.get_output().nr(), sub.get_output().nc()); } else { gamma = alias_tensor(1, sub.get_output().k()); } beta = gamma; params.set_size(gamma.size()+beta.size()); gamma(params,0) = 1; beta(params,gamma.size()) = 0; } void forward_inplace(const tensor& input, tensor& output) { auto g = gamma(params,0); auto b = beta(params,gamma.size()); if (mode == FC_MODE) tt::affine_transform(output, input, g, b); else tt::affine_transform_conv(output, input, g, b); } void backward_inplace( const tensor& gradient_input, tensor& data_grad, tensor& /*params_grad*/ ) { auto g = gamma(params,0); auto b = beta(params,gamma.size()); // We are computing the gradient of dot(gradient_input, computed_output*g + b) if (mode == FC_MODE) { if (is_same_object(gradient_input, data_grad)) tt::multiply(false, data_grad, gradient_input, g); else tt::multiply(true, data_grad, gradient_input, g); } else { if (is_same_object(gradient_input, data_grad)) tt::multiply_conv(false, data_grad, gradient_input, g); else tt::multiply_conv(true, data_grad, gradient_input, g); } } const tensor& get_layer_params() const { return empty_params; } tensor& get_layer_params() { return empty_params; } friend void serialize(const affine_& item, std::ostream& out) { serialize("affine_", out); serialize(item.params, out); serialize(item.gamma, out); serialize(item.beta, out); serialize((int)item.mode, out); } friend void deserialize(affine_& item, std::istream& in) { std::string version; deserialize(version, in); if (version == "bn_con2") { // Since we can build an affine_ from a bn_ we check if that's what is in // the stream and if so then just convert it right here. unserialize sin(version, in); bn_ temp; deserialize(temp, sin); item = temp; return; } else if (version == "bn_fc2") { // Since we can build an affine_ from a bn_ we check if that's what is in // the stream and if so then just convert it right here. unserialize sin(version, in); bn_ temp; deserialize(temp, sin); item = temp; return; } if (version != "affine_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::affine_."); deserialize(item.params, in); deserialize(item.gamma, in); deserialize(item.beta, in); int mode; deserialize(mode, in); item.mode = (layer_mode)mode; } friend std::ostream& operator<<(std::ostream& out, const affine_& ) { out << "affine"; return out; } friend void to_xml(const affine_& item, std::ostream& out) { if (item.mode==CONV_MODE) out << "\n"; else out << "\n"; out << mat(item.params); if (item.mode==CONV_MODE) out << "\n"; else out << "\n"; } private: resizable_tensor params, empty_params; alias_tensor gamma, beta; layer_mode mode; }; template using affine = add_layer; // ---------------------------------------------------------------------------------------- template < template class tag > class add_prev_ { public: const static unsigned long id = tag_id::id; add_prev_() { } template void setup (const SUBNET& /*sub*/) { } template void forward(const SUBNET& sub, resizable_tensor& output) { auto&& t1 = sub.get_output(); auto&& t2 = layer(sub).get_output(); output.set_size(std::max(t1.num_samples(),t2.num_samples()), std::max(t1.k(),t2.k()), std::max(t1.nr(),t2.nr()), std::max(t1.nc(),t2.nc())); tt::add(output, t1, t2); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { // The gradient just flows backwards to the two layers that forward() added // together. tt::add(sub.get_gradient_input(), sub.get_gradient_input(), gradient_input); tt::add(layer(sub).get_gradient_input(), layer(sub).get_gradient_input(), gradient_input); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } friend void serialize(const add_prev_& , std::ostream& out) { serialize("add_prev_", out); } friend void deserialize(add_prev_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "add_prev_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::add_prev_."); } friend std::ostream& operator<<(std::ostream& out, const add_prev_& item) { out << "add_prev"<\n"; } private: resizable_tensor params; }; template < template class tag, typename SUBNET > using add_prev = add_layer, SUBNET>; template using add_prev1 = add_prev; template using add_prev2 = add_prev; template using add_prev3 = add_prev; template using add_prev4 = add_prev; template using add_prev5 = add_prev; template using add_prev6 = add_prev; template using add_prev7 = add_prev; template using add_prev8 = add_prev; template using add_prev9 = add_prev; template using add_prev10 = add_prev; using add_prev1_ = add_prev_; using add_prev2_ = add_prev_; using add_prev3_ = add_prev_; using add_prev4_ = add_prev_; using add_prev5_ = add_prev_; using add_prev6_ = add_prev_; using add_prev7_ = add_prev_; using add_prev8_ = add_prev_; using add_prev9_ = add_prev_; using add_prev10_ = add_prev_; // ---------------------------------------------------------------------------------------- template < template class tag > class mult_prev_ { public: const static unsigned long id = tag_id::id; mult_prev_() { } template void setup (const SUBNET& /*sub*/) { } template void forward(const SUBNET& sub, resizable_tensor& output) { auto&& t1 = sub.get_output(); auto&& t2 = layer(sub).get_output(); output.set_size(std::max(t1.num_samples(),t2.num_samples()), std::max(t1.k(),t2.k()), std::max(t1.nr(),t2.nr()), std::max(t1.nc(),t2.nc())); tt::multiply_zero_padded(false, output, t1, t2); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { auto&& t1 = sub.get_output(); auto&& t2 = layer(sub).get_output(); // The gradient just flows backwards to the two layers that forward() // multiplied together. tt::multiply_zero_padded(true, sub.get_gradient_input(), t2, gradient_input); tt::multiply_zero_padded(true, layer(sub).get_gradient_input(), t1, gradient_input); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const mult_prev_& , std::ostream& out) { serialize("mult_prev_", out); } friend void deserialize(mult_prev_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "mult_prev_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::mult_prev_."); } friend std::ostream& operator<<(std::ostream& out, const mult_prev_& item) { out << "mult_prev"<\n"; } private: resizable_tensor params; }; template < template class tag, typename SUBNET > using mult_prev = add_layer, SUBNET>; template using mult_prev1 = mult_prev; template using mult_prev2 = mult_prev; template using mult_prev3 = mult_prev; template using mult_prev4 = mult_prev; template using mult_prev5 = mult_prev; template using mult_prev6 = mult_prev; template using mult_prev7 = mult_prev; template using mult_prev8 = mult_prev; template using mult_prev9 = mult_prev; template using mult_prev10 = mult_prev; using mult_prev1_ = mult_prev_; using mult_prev2_ = mult_prev_; using mult_prev3_ = mult_prev_; using mult_prev4_ = mult_prev_; using mult_prev5_ = mult_prev_; using mult_prev6_ = mult_prev_; using mult_prev7_ = mult_prev_; using mult_prev8_ = mult_prev_; using mult_prev9_ = mult_prev_; using mult_prev10_ = mult_prev_; // ---------------------------------------------------------------------------------------- template < template class tag > class scale_ { public: const static unsigned long id = tag_id::id; scale_() { } template void setup (const SUBNET& /*sub*/) { } template void forward(const SUBNET& sub, resizable_tensor& output) { auto&& scales = sub.get_output(); auto&& src = layer(sub).get_output(); DLIB_CASSERT(scales.num_samples() == src.num_samples() && scales.k() == src.k() && scales.nr() == 1 && scales.nc() == 1, "scales.k(): " << scales.k() << "\nsrc.k(): " << src.k() ); output.copy_size(src); tt::scale_channels(false, output, src, scales); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { auto&& scales = sub.get_output(); auto&& src = layer(sub).get_output(); // The gradient just flows backwards to the two layers that forward() // read from. tt::scale_channels(true, layer(sub).get_gradient_input(), gradient_input, scales); if (reshape_src.num_samples() != src.num_samples()) { reshape_scales = alias_tensor(src.num_samples()*src.k()); reshape_src = alias_tensor(src.num_samples()*src.k(),src.nr()*src.nc()); } auto&& scales_grad = sub.get_gradient_input(); auto sgrad = reshape_scales(scales_grad); tt::dot_prods(true, sgrad, reshape_src(src), reshape_src(gradient_input)); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const scale_& item, std::ostream& out) { serialize("scale_", out); serialize(item.reshape_scales, out); serialize(item.reshape_src, out); } friend void deserialize(scale_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "scale_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::scale_."); deserialize(item.reshape_scales, in); deserialize(item.reshape_src, in); } friend std::ostream& operator<<(std::ostream& out, const scale_& item) { out << "scale"<\n"; } private: alias_tensor reshape_scales; alias_tensor reshape_src; resizable_tensor params; }; template < template class tag, typename SUBNET > using scale = add_layer, SUBNET>; template using scale1 = scale; template using scale2 = scale; template using scale3 = scale; template using scale4 = scale; template using scale5 = scale; template using scale6 = scale; template using scale7 = scale; template using scale8 = scale; template using scale9 = scale; template using scale10 = scale; using scale1_ = scale_; using scale2_ = scale_; using scale3_ = scale_; using scale4_ = scale_; using scale5_ = scale_; using scale6_ = scale_; using scale7_ = scale_; using scale8_ = scale_; using scale9_ = scale_; using scale10_ = scale_; // ---------------------------------------------------------------------------------------- class relu_ { public: relu_() { } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { tt::relu(output, input); } void backward_inplace( const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& ) { tt::relu_gradient(data_grad, computed_output, gradient_input); } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const relu_& , std::ostream& out) { serialize("relu_", out); } friend void deserialize(relu_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "relu_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::relu_."); } friend std::ostream& operator<<(std::ostream& out, const relu_& ) { out << "relu"; return out; } friend void to_xml(const relu_& /*item*/, std::ostream& out) { out << "\n"; } private: resizable_tensor params; }; template using relu = add_layer; // ---------------------------------------------------------------------------------------- class prelu_ { public: explicit prelu_( float initial_param_value_ = 0.25 ) : initial_param_value(initial_param_value_) { } float get_initial_param_value ( ) const { return initial_param_value; } template void setup (const SUBNET& /*sub*/) { params.set_size(1); params = initial_param_value; } template void forward( const SUBNET& sub, resizable_tensor& data_output ) { data_output.copy_size(sub.get_output()); tt::prelu(data_output, sub.get_output(), params); } template void backward( const tensor& gradient_input, SUBNET& sub, tensor& params_grad ) { tt::prelu_gradient(sub.get_gradient_input(), sub.get_output(), gradient_input, params, params_grad); } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const prelu_& item, std::ostream& out) { serialize("prelu_", out); serialize(item.params, out); serialize(item.initial_param_value, out); } friend void deserialize(prelu_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "prelu_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::prelu_."); deserialize(item.params, in); deserialize(item.initial_param_value, in); } friend std::ostream& operator<<(std::ostream& out, const prelu_& item) { out << "prelu\t (" << "initial_param_value="<\n"; out << mat(item.params); out << "\n"; } private: resizable_tensor params; float initial_param_value; }; template using prelu = add_layer; // ---------------------------------------------------------------------------------------- class sig_ { public: sig_() { } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { tt::sigmoid(output, input); } void backward_inplace( const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& ) { tt::sigmoid_gradient(data_grad, computed_output, gradient_input); } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const sig_& , std::ostream& out) { serialize("sig_", out); } friend void deserialize(sig_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "sig_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::sig_."); } friend std::ostream& operator<<(std::ostream& out, const sig_& ) { out << "sig"; return out; } friend void to_xml(const sig_& /*item*/, std::ostream& out) { out << "\n"; } private: resizable_tensor params; }; template using sig = add_layer; // ---------------------------------------------------------------------------------------- class htan_ { public: htan_() { } template void setup (const SUBNET& /*sub*/) { } inline dpoint map_input_to_output (const dpoint& p) const { return p; } inline dpoint map_output_to_input (const dpoint& p) const { return p; } void forward_inplace(const tensor& input, tensor& output) { tt::tanh(output, input); } void backward_inplace( const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& ) { tt::tanh_gradient(data_grad, computed_output, gradient_input); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const htan_& , std::ostream& out) { serialize("htan_", out); } friend void deserialize(htan_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "htan_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::htan_."); } friend std::ostream& operator<<(std::ostream& out, const htan_& ) { out << "htan"; return out; } friend void to_xml(const htan_& /*item*/, std::ostream& out) { out << "\n"; } private: resizable_tensor params; }; template using htan = add_layer; // ---------------------------------------------------------------------------------------- class softmax_ { public: softmax_() { } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { tt::softmax(output, input); } void backward_inplace( const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& ) { tt::softmax_gradient(data_grad, computed_output, gradient_input); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const softmax_& , std::ostream& out) { serialize("softmax_", out); } friend void deserialize(softmax_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "softmax_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_."); } friend std::ostream& operator<<(std::ostream& out, const softmax_& ) { out << "softmax"; return out; } friend void to_xml(const softmax_& /*item*/, std::ostream& out) { out << "\n"; } private: resizable_tensor params; }; template using softmax = add_layer; // ---------------------------------------------------------------------------------------- class softmax_all_ { public: softmax_all_() { } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { tt::softmax_all(output, input); } void backward_inplace( const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& ) { tt::softmax_all_gradient(data_grad, computed_output, gradient_input); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const softmax_all_& , std::ostream& out) { serialize("softmax_all_", out); } friend void deserialize(softmax_all_& , std::istream& in) { std::string version; deserialize(version, in); if (version != "softmax_all_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::softmax_all_."); } friend std::ostream& operator<<(std::ostream& out, const softmax_all_& ) { out << "softmax_all"; return out; } friend void to_xml(const softmax_all_& /*item*/, std::ostream& out) { out << "\n"; } private: resizable_tensor params; }; template using softmax_all = add_layer; // ---------------------------------------------------------------------------------------- namespace impl { template class TAG_TYPE, template class... TAG_TYPES> struct concat_helper_impl{ constexpr static size_t tag_count() {return 1 + concat_helper_impl::tag_count();} static void list_tags(std::ostream& out) { out << tag_id::id << (tag_count() > 1 ? "," : ""); concat_helper_impl::list_tags(out); } template static void resize_out(resizable_tensor& out, const SUBNET& sub, long sum_k) { auto& t = layer(sub).get_output(); concat_helper_impl::resize_out(out, sub, sum_k + t.k()); } template static void concat(tensor& out, const SUBNET& sub, size_t k_offset) { auto& t = layer(sub).get_output(); tt::copy_tensor(false, out, k_offset, t, 0, t.k()); k_offset += t.k(); concat_helper_impl::concat(out, sub, k_offset); } template static void split(const tensor& input, SUBNET& sub, size_t k_offset) { auto& t = layer(sub).get_gradient_input(); tt::copy_tensor(true, t, 0, input, k_offset, t.k()); k_offset += t.k(); concat_helper_impl::split(input, sub, k_offset); } }; template class TAG_TYPE> struct concat_helper_impl{ constexpr static size_t tag_count() {return 1;} static void list_tags(std::ostream& out) { out << tag_id::id; } template static void resize_out(resizable_tensor& out, const SUBNET& sub, long sum_k) { auto& t = layer(sub).get_output(); out.set_size(t.num_samples(), t.k() + sum_k, t.nr(), t.nc()); } template static void concat(tensor& out, const SUBNET& sub, size_t k_offset) { auto& t = layer(sub).get_output(); tt::copy_tensor(false, out, k_offset, t, 0, t.k()); } template static void split(const tensor& input, SUBNET& sub, size_t k_offset) { auto& t = layer(sub).get_gradient_input(); tt::copy_tensor(true, t, 0, input, k_offset, t.k()); } }; } // concat layer template< template class... TAG_TYPES > class concat_ { static void list_tags(std::ostream& out) { impl::concat_helper_impl::list_tags(out);}; public: constexpr static size_t tag_count() {return impl::concat_helper_impl::tag_count();}; template void setup (const SUBNET&) { // do nothing } template void forward(const SUBNET& sub, resizable_tensor& output) { // the total depth of result is the sum of depths from all tags impl::concat_helper_impl::resize_out(output, sub, 0); // copy output from each tag into different part result impl::concat_helper_impl::concat(output, sub, 0); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor&) { // Gradient is split into parts for each tag layer impl::concat_helper_impl::split(gradient_input, sub, 0); } dpoint map_input_to_output(dpoint p) const { return p; } dpoint map_output_to_input(dpoint p) const { return p; } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const concat_& item, std::ostream& out) { serialize("concat_", out); size_t count = tag_count(); serialize(count, out); } friend void deserialize(concat_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "concat_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::concat_."); size_t count_tags; deserialize(count_tags, in); if (count_tags != tag_count()) throw serialization_error("Invalid count of tags "+ std::to_string(count_tags) +", expecting " + std::to_string(tag_count()) + " found while deserializing dlib::concat_."); } friend std::ostream& operator<<(std::ostream& out, const concat_& item) { out << "concat\t ("; list_tags(out); out << ")"; return out; } friend void to_xml(const concat_& item, std::ostream& out) { out << "\n"; } private: resizable_tensor params; // unused }; // concat layer definitions template class TAG1, template class TAG2, typename SUBNET> using concat2 = add_layer, SUBNET>; template class TAG1, template class TAG2, template class TAG3, typename SUBNET> using concat3 = add_layer, SUBNET>; template class TAG1, template class TAG2, template class TAG3, template class TAG4, typename SUBNET> using concat4 = add_layer, SUBNET>; template class TAG1, template class TAG2, template class TAG3, template class TAG4, template class TAG5, typename SUBNET> using concat5 = add_layer, SUBNET>; // inception layer will use tags internally. If user will use tags too, some conflicts // possible to exclude them, here are new tags specially for inceptions template using itag0 = add_tag_layer< 1000 + 0, SUBNET>; template using itag1 = add_tag_layer< 1000 + 1, SUBNET>; template using itag2 = add_tag_layer< 1000 + 2, SUBNET>; template using itag3 = add_tag_layer< 1000 + 3, SUBNET>; template using itag4 = add_tag_layer< 1000 + 4, SUBNET>; template using itag5 = add_tag_layer< 1000 + 5, SUBNET>; // skip to inception input template using iskip = add_skip_layer< itag0, SUBNET>; // here are some templates to be used for creating inception layer groups template class B1, templateclass B2, typename SUBNET> using inception2 = concat2>>>>>>; template class B1, templateclass B2, templateclass B3, typename SUBNET> using inception3 = concat3>>>>>>>>>; template class B1, templateclass B2, templateclass B3, templateclass B4, typename SUBNET> using inception4 = concat4>>>>>>>>>>>>; template class B1, templateclass B2, templateclass B3, templateclass B4, templateclass B5, typename SUBNET> using inception5 = concat5>>>>>>>>>>>>>>>; // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- const double DEFAULT_L2_NORM_EPS = 1e-5; class l2normalize_ { public: explicit l2normalize_( double eps_ = DEFAULT_L2_NORM_EPS ) : eps(eps_) { } double get_eps() const { return eps; } template void setup (const SUBNET& /*sub*/) { } void forward_inplace(const tensor& input, tensor& output) { tt::inverse_norms(norm, input, eps); tt::scale_rows(output, input, norm); } void backward_inplace( const tensor& computed_output, const tensor& gradient_input, tensor& data_grad, tensor& /*params_grad*/ ) { if (is_same_object(gradient_input, data_grad)) { tt::dot_prods(temp, gradient_input, computed_output); tt::scale_rows2(0, data_grad, gradient_input, computed_output, temp, norm); } else { tt::dot_prods(temp, gradient_input, computed_output); tt::scale_rows2(1, data_grad, gradient_input, computed_output, temp, norm); } } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const l2normalize_& item, std::ostream& out) { serialize("l2normalize_", out); serialize(item.eps, out); } friend void deserialize(l2normalize_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "l2normalize_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::l2normalize_."); deserialize(item.eps, in); } friend std::ostream& operator<<(std::ostream& out, const l2normalize_& item) { out << "l2normalize"; out << " eps="<\n"; } private: double eps; resizable_tensor params; // unused // Here only to avoid reallocation and as a cache between forward/backward // functions. resizable_tensor norm; resizable_tensor temp; }; template using l2normalize = add_layer; // ---------------------------------------------------------------------------------------- template < long _offset, long _k, long _nr, long _nc > class extract_ { static_assert(_offset >= 0, "The offset must be >= 0."); static_assert(_k > 0, "The number of channels must be > 0."); static_assert(_nr > 0, "The number of rows must be > 0."); static_assert(_nc > 0, "The number of columns must be > 0."); public: extract_( ) { } template void setup (const SUBNET& sub) { DLIB_CASSERT((long)sub.get_output().size() >= sub.get_output().num_samples()*(_offset+_k*_nr*_nc), "The tensor we are trying to extract from the input tensor is too big to fit into the input tensor."); aout = alias_tensor(sub.get_output().num_samples(), _k*_nr*_nc); ain = alias_tensor(sub.get_output().num_samples(), sub.get_output().size()/sub.get_output().num_samples()); } template void forward(const SUBNET& sub, resizable_tensor& output) { if (aout.num_samples() != sub.get_output().num_samples()) { aout = alias_tensor(sub.get_output().num_samples(), _k*_nr*_nc); ain = alias_tensor(sub.get_output().num_samples(), sub.get_output().size()/sub.get_output().num_samples()); } output.set_size(sub.get_output().num_samples(), _k, _nr, _nc); auto out = aout(output,0); auto in = ain(sub.get_output(),0); tt::copy_tensor(false, out, 0, in, _offset, _k*_nr*_nc); } template void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/) { auto out = ain(sub.get_gradient_input(),0); auto in = aout(gradient_input,0); tt::copy_tensor(true, out, _offset, in, 0, _k*_nr*_nc); } const tensor& get_layer_params() const { return params; } tensor& get_layer_params() { return params; } friend void serialize(const extract_& item, std::ostream& out) { serialize("extract_", out); serialize(_offset, out); serialize(_k, out); serialize(_nr, out); serialize(_nc, out); } friend void deserialize(extract_& item, std::istream& in) { std::string version; deserialize(version, in); if (version != "extract_") throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::extract_."); long offset; long k; long nr; long nc; deserialize(offset, in); deserialize(k, in); deserialize(nr, in); deserialize(nc, in); if (offset != _offset) throw serialization_error("Wrong offset found while deserializing dlib::extract_"); if (k != _k) throw serialization_error("Wrong k found while deserializing dlib::extract_"); if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::extract_"); if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::extract_"); } friend std::ostream& operator<<(std::ostream& out, const extract_& item) { out << "extract\t (" << "offset="<<_offset << ", k="<<_k << ", nr="<<_nr << ", nc="<<_nc << ")"; return out; } friend void to_xml(const extract_& item, std::ostream& out) { out << "\n"; } private: alias_tensor aout, ain; resizable_tensor params; // unused }; template < long offset, long k, long nr, long nc, typename SUBNET > using extract = add_layer, SUBNET>; // ---------------------------------------------------------------------------------------- } #endif // DLIB_DNn_LAYERS_H_