diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 11:19:16 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:53:24 +0000 |
commit | b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 (patch) | |
tree | d4d31289c39fc00da064a825df13a0b98ce95b10 /ml/dlib/tools/convert_dlib_nets_to_caffe | |
parent | Adding upstream version 1.44.3. (diff) | |
download | netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.tar.xz netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.zip |
Adding upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/tools/convert_dlib_nets_to_caffe')
3 files changed, 0 insertions, 894 deletions
diff --git a/ml/dlib/tools/convert_dlib_nets_to_caffe/CMakeLists.txt b/ml/dlib/tools/convert_dlib_nets_to_caffe/CMakeLists.txt deleted file mode 100644 index f9518df21..000000000 --- a/ml/dlib/tools/convert_dlib_nets_to_caffe/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# -# This is a CMake makefile. You can find the cmake utility and -# information about it at http://www.cmake.org -# - -cmake_minimum_required(VERSION 2.8.12) - -set (target_name dtoc) - -PROJECT(${target_name}) - -add_subdirectory(../../dlib dlib_build) - -add_executable(${target_name} - main.cpp - ) - -target_link_libraries(${target_name} dlib::dlib ) - - -INSTALL(TARGETS ${target_name} - RUNTIME DESTINATION bin - ) - - diff --git a/ml/dlib/tools/convert_dlib_nets_to_caffe/main.cpp b/ml/dlib/tools/convert_dlib_nets_to_caffe/main.cpp deleted file mode 100644 index f5cc19748..000000000 --- a/ml/dlib/tools/convert_dlib_nets_to_caffe/main.cpp +++ /dev/null @@ -1,792 +0,0 @@ - -#include <dlib/xml_parser.h> -#include <dlib/matrix.h> -#include <fstream> -#include <vector> -#include <stack> -#include <set> -#include <dlib/string.h> - -using namespace std; -using namespace dlib; - - -// ---------------------------------------------------------------------------------------- - -// Only these computational layers have parameters -const std::set<string> comp_tags_with_params = {"fc", "fc_no_bias", "con", "affine_con", "affine_fc", "affine", "prelu"}; - -struct layer -{ - string type; // comp, loss, or input - int idx; - - matrix<long,4,1> output_tensor_shape; // (N,K,NR,NC) - - string detail_name; // The name of the tag inside the layer tag. e.g. fc, con, max_pool, input_rgb_image. - std::map<string,double> attributes; - matrix<float> params; - long tag_id = -1; // If this isn't -1 then it means this layer was tagged, e.g. wrapped with tag2<> giving tag_id==2 - long skip_id = -1; // If this isn't -1 then it means this layer draws its inputs from - // the most recent layer with tag_id==skip_id rather than its immediate predecessor. - - double attribute (const string& key) const - { - auto i = attributes.find(key); - if (i != attributes.end()) - return i->second; - else - throw dlib::error("Layer doesn't have the requested attribute '" + key + "'."); - } - - string caffe_layer_name() const - { - if (type == "input") - return "data"; - else - return detail_name+to_string(idx); - } -}; - -// ---------------------------------------------------------------------------------------- - -std::vector<layer> parse_dlib_xml( - const matrix<long,4,1>& input_tensor_shape, - const string& xml_filename -); - -// ---------------------------------------------------------------------------------------- - -template <typename iterator> -const layer& find_layer ( - iterator i, - long tag_id -) -/*! - requires - - i is a reverse iterator pointing to a layer in the list of layers produced by parse_dlib_xml(). - - i is not an input layer. - ensures - - if (tag_id == -1) then - - returns the previous layer (i.e. closer to the input) to layer i. - - else - - returns the previous layer (i.e. closer to the input) to layer i with the - given tag_id. -!*/ -{ - if (tag_id == -1) - { - return *(i-1); - } - else - { - while(true) - { - i--; - // if we hit the end of the network before we found what we were looking for - if (i->tag_id == tag_id) - return *i; - if (i->type == "input") - throw dlib::error("Network definition is bad, a layer wanted to skip back to a non-existing layer."); - } - } -} - -template <typename iterator> -const layer& find_input_layer (iterator i) { return find_layer(i, i->skip_id); } - -template <typename iterator> -string find_layer_caffe_name ( - iterator i, - long tag_id -) -{ - return find_layer(i,tag_id).caffe_layer_name(); -} - -template <typename iterator> -string find_input_layer_caffe_name (iterator i) { return find_input_layer(i).caffe_layer_name(); } - -// ---------------------------------------------------------------------------------------- - -template <typename iterator> -void compute_caffe_padding_size_for_pooling_layer( - const iterator& i, - long& pad_x, - long& pad_y -) -/*! - requires - - i is a reverse iterator pointing to a layer in the list of layers produced by parse_dlib_xml(). - - i is not an input layer. - ensures - - Caffe is funny about how it computes the output sizes from pooling layers. - Rather than using the normal formula for output row/column sizes used by all the - other layers (and what dlib uses everywhere), - floor((bottom_size + 2*pad - kernel_size) / stride) + 1 - it instead uses: - ceil((bottom_size + 2*pad - kernel_size) / stride) + 1 - - These are the same except when the stride!=1. In that case we need to figure out - how to change the padding value so that the output size of the caffe padding - layer will match the output size of the dlib padding layer. That is what this - function does. -!*/ -{ - const long dlib_output_nr = i->output_tensor_shape(2); - const long dlib_output_nc = i->output_tensor_shape(3); - const long bottom_nr = find_input_layer(i).output_tensor_shape(2); - const long bottom_nc = find_input_layer(i).output_tensor_shape(3); - const long padding_x = (long)i->attribute("padding_x"); - const long padding_y = (long)i->attribute("padding_y"); - const long stride_x = (long)i->attribute("stride_x"); - const long stride_y = (long)i->attribute("stride_y"); - long kernel_w = i->attribute("nc"); - long kernel_h = i->attribute("nr"); - - if (kernel_w == 0) - kernel_w = bottom_nc; - if (kernel_h == 0) - kernel_h = bottom_nr; - - - // The correct padding for caffe could be anything in the range [0,padding_x]. So - // check what gives the correct output size and use that. - for (pad_x = 0; pad_x <= padding_x; ++pad_x) - { - long caffe_out_size = ceil((bottom_nc + 2.0*pad_x - kernel_w)/(double)stride_x) + 1; - if (caffe_out_size == dlib_output_nc) - break; - } - if (pad_x == padding_x+1) - { - std::ostringstream sout; - sout << "No conversion between dlib pooling layer parameters and caffe pooling layer parameters found for layer " << to_string(i->idx) << endl; - sout << "dlib_output_nc: " << dlib_output_nc << endl; - sout << "bottom_nc: " << bottom_nc << endl; - sout << "padding_x: " << padding_x << endl; - sout << "stride_x: " << stride_x << endl; - sout << "kernel_w: " << kernel_w << endl; - sout << "pad_x: " << pad_x << endl; - throw dlib::error(sout.str()); - } - - for (pad_y = 0; pad_y <= padding_y; ++pad_y) - { - long caffe_out_size = ceil((bottom_nr + 2.0*pad_y - kernel_h)/(double)stride_y) + 1; - if (caffe_out_size == dlib_output_nr) - break; - } - if (pad_y == padding_y+1) - { - std::ostringstream sout; - sout << "No conversion between dlib pooling layer parameters and caffe pooling layer parameters found for layer " << to_string(i->idx) << endl; - sout << "dlib_output_nr: " << dlib_output_nr << endl; - sout << "bottom_nr: " << bottom_nr << endl; - sout << "padding_y: " << padding_y << endl; - sout << "stride_y: " << stride_y << endl; - sout << "kernel_h: " << kernel_h << endl; - sout << "pad_y: " << pad_y << endl; - throw dlib::error(sout.str()); - } -} - -// ---------------------------------------------------------------------------------------- - -void convert_dlib_xml_to_caffe_python_code( - const string& xml_filename, - const long N, - const long K, - const long NR, - const long NC -) -{ - const string out_filename = left_substr(xml_filename,".") + "_dlib_to_caffe_model.py"; - const string out_weights_filename = left_substr(xml_filename,".") + "_dlib_to_caffe_model.weights"; - cout << "Writing python part of model to " << out_filename << endl; - cout << "Writing weights part of model to " << out_weights_filename << endl; - ofstream fout(out_filename); - fout.precision(9); - const auto layers = parse_dlib_xml({N,K,NR,NC}, xml_filename); - - - fout << "#\n"; - fout << "# !!! This file was automatically generated by dlib's tools/convert_dlib_nets_to_caffe utility. !!!\n"; - fout << "# !!! It contains all the information from a dlib DNN network and lets you save it as a cafe model. !!!\n"; - fout << "#\n"; - fout << "import caffe " << endl; - fout << "from caffe import layers as L, params as P" << endl; - fout << "import numpy as np" << endl; - - // dlib nets don't commit to a batch size, so just use 1 as the default - fout << "\n# Input tensor dimensions" << endl; - fout << "input_batch_size = " << N << ";" << endl; - if (layers.back().detail_name == "input_rgb_image") - { - fout << "input_num_channels = 3;" << endl; - fout << "input_num_rows = "<<NR<<";" << endl; - fout << "input_num_cols = "<<NC<<";" << endl; - if (K != 3) - throw dlib::error("The dlib model requires input tensors with NUM_CHANNELS==3, but the dtoc command line specified NUM_CHANNELS=="+to_string(K)); - } - else if (layers.back().detail_name == "input_rgb_image_sized") - { - fout << "input_num_channels = 3;" << endl; - fout << "input_num_rows = " << layers.back().attribute("nr") << ";" << endl; - fout << "input_num_cols = " << layers.back().attribute("nc") << ";" << endl; - if (NR != layers.back().attribute("nr")) - throw dlib::error("The dlib model requires input tensors with NUM_ROWS=="+to_string((long)layers.back().attribute("nr"))+", but the dtoc command line specified NUM_ROWS=="+to_string(NR)); - if (NC != layers.back().attribute("nc")) - throw dlib::error("The dlib model requires input tensors with NUM_COLUMNS=="+to_string((long)layers.back().attribute("nc"))+", but the dtoc command line specified NUM_COLUMNS=="+to_string(NC)); - if (K != 3) - throw dlib::error("The dlib model requires input tensors with NUM_CHANNELS==3, but the dtoc command line specified NUM_CHANNELS=="+to_string(K)); - } - else if (layers.back().detail_name == "input") - { - fout << "input_num_channels = 1;" << endl; - fout << "input_num_rows = "<<NR<<";" << endl; - fout << "input_num_cols = "<<NC<<";" << endl; - if (K != 1) - throw dlib::error("The dlib model requires input tensors with NUM_CHANNELS==1, but the dtoc command line specified NUM_CHANNELS=="+to_string(K)); - } - else - { - throw dlib::error("No known transformation from dlib's " + layers.back().detail_name + " layer to caffe."); - } - fout << endl; - fout << "# Call this function to write the dlib DNN model out to file as a pair of caffe\n"; - fout << "# definition and weight files. You can then use the network by loading it with\n"; - fout << "# this statement: \n"; - fout << "# net = caffe.Net(def_file, weights_file, caffe.TEST);\n"; - fout << "#\n"; - fout << "def save_as_caffe_model(def_file, weights_file):\n"; - fout << " with open(def_file, 'w') as f: f.write(str(make_netspec()));\n"; - fout << " net = caffe.Net(def_file, caffe.TEST);\n"; - fout << " set_network_weights(net);\n"; - fout << " net.save(weights_file);\n\n"; - fout << "###############################################################################\n"; - fout << "# EVERYTHING BELOW HERE DEFINES THE DLIB MODEL PARAMETERS #\n"; - fout << "###############################################################################\n\n\n"; - - - // ----------------------------------------------------------------------------------- - // The next block of code outputs python code that defines the network architecture. - // ----------------------------------------------------------------------------------- - - fout << "def make_netspec():" << endl; - fout << " # For reference, the only \"documentation\" about caffe layer parameters seems to be this page:\n"; - fout << " # https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto\n" << endl; - fout << " n = caffe.NetSpec(); " << endl; - fout << " n.data,n.label = L.MemoryData(batch_size=input_batch_size, channels=input_num_channels, height=input_num_rows, width=input_num_cols, ntop=2)" << endl; - // iterate the layers starting with the input layer - for (auto i = layers.rbegin(); i != layers.rend(); ++i) - { - // skip input and loss layers - if (i->type == "loss" || i->type == "input") - continue; - - - if (i->detail_name == "con") - { - fout << " n." << i->caffe_layer_name() << " = L.Convolution(n." << find_input_layer_caffe_name(i); - fout << ", num_output=" << i->attribute("num_filters"); - fout << ", kernel_w=" << i->attribute("nc"); - fout << ", kernel_h=" << i->attribute("nr"); - fout << ", stride_w=" << i->attribute("stride_x"); - fout << ", stride_h=" << i->attribute("stride_y"); - fout << ", pad_w=" << i->attribute("padding_x"); - fout << ", pad_h=" << i->attribute("padding_y"); - fout << ");\n"; - } - else if (i->detail_name == "relu") - { - fout << " n." << i->caffe_layer_name() << " = L.ReLU(n." << find_input_layer_caffe_name(i); - fout << ");\n"; - } - else if (i->detail_name == "sig") - { - fout << " n." << i->caffe_layer_name() << " = L.Sigmoid(n." << find_input_layer_caffe_name(i); - fout << ");\n"; - } - else if (i->detail_name == "prelu") - { - fout << " n." << i->caffe_layer_name() << " = L.PReLU(n." << find_input_layer_caffe_name(i); - fout << ", channel_shared=True"; - fout << ");\n"; - } - else if (i->detail_name == "max_pool") - { - fout << " n." << i->caffe_layer_name() << " = L.Pooling(n." << find_input_layer_caffe_name(i); - fout << ", pool=P.Pooling.MAX"; - if (i->attribute("nc")==0) - { - fout << ", global_pooling=True"; - } - else - { - fout << ", kernel_w=" << i->attribute("nc"); - fout << ", kernel_h=" << i->attribute("nr"); - } - - fout << ", stride_w=" << i->attribute("stride_x"); - fout << ", stride_h=" << i->attribute("stride_y"); - long pad_x, pad_y; - compute_caffe_padding_size_for_pooling_layer(i, pad_x, pad_y); - fout << ", pad_w=" << pad_x; - fout << ", pad_h=" << pad_y; - fout << ");\n"; - } - else if (i->detail_name == "avg_pool") - { - fout << " n." << i->caffe_layer_name() << " = L.Pooling(n." << find_input_layer_caffe_name(i); - fout << ", pool=P.Pooling.AVE"; - if (i->attribute("nc")==0) - { - fout << ", global_pooling=True"; - } - else - { - fout << ", kernel_w=" << i->attribute("nc"); - fout << ", kernel_h=" << i->attribute("nr"); - } - if (i->attribute("padding_x") != 0 || i->attribute("padding_y") != 0) - { - throw dlib::error("dlib and caffe implement pooling with non-zero padding differently, so you can't convert a " - "network with such pooling layers."); - } - - fout << ", stride_w=" << i->attribute("stride_x"); - fout << ", stride_h=" << i->attribute("stride_y"); - long pad_x, pad_y; - compute_caffe_padding_size_for_pooling_layer(i, pad_x, pad_y); - fout << ", pad_w=" << pad_x; - fout << ", pad_h=" << pad_y; - fout << ");\n"; - } - else if (i->detail_name == "fc") - { - fout << " n." << i->caffe_layer_name() << " = L.InnerProduct(n." << find_input_layer_caffe_name(i); - fout << ", num_output=" << i->attribute("num_outputs"); - fout << ", bias_term=True"; - fout << ");\n"; - } - else if (i->detail_name == "fc_no_bias") - { - fout << " n." << i->caffe_layer_name() << " = L.InnerProduct(n." << find_input_layer_caffe_name(i); - fout << ", num_output=" << i->attribute("num_outputs"); - fout << ", bias_term=False"; - fout << ");\n"; - } - else if (i->detail_name == "bn_con" || i->detail_name == "bn_fc") - { - throw dlib::error("Conversion from dlib's batch norm layers to caffe's isn't supported. Instead, " - "you should put your dlib network into 'test mode' by switching batch norm layers to affine layers. " - "Then you can convert that 'test mode' network to caffe."); - } - else if (i->detail_name == "affine_con") - { - fout << " n." << i->caffe_layer_name() << " = L.Scale(n." << find_input_layer_caffe_name(i); - fout << ", bias_term=True"; - fout << ");\n"; - } - else if (i->detail_name == "affine_fc") - { - fout << " n." << i->caffe_layer_name() << " = L.Scale(n." << find_input_layer_caffe_name(i); - fout << ", bias_term=True"; - fout << ");\n"; - } - else if (i->detail_name == "add_prev") - { - auto in_shape1 = find_input_layer(i).output_tensor_shape; - auto in_shape2 = find_layer(i,i->attribute("tag")).output_tensor_shape; - if (in_shape1 != in_shape2) - { - // if only the number of channels differs then we will use a dummy layer to - // pad with zeros. But otherwise we will throw an error. - if (in_shape1(0) == in_shape2(0) && - in_shape1(2) == in_shape2(2) && - in_shape1(3) == in_shape2(3)) - { - fout << " n." << i->caffe_layer_name() << "_zeropad = L.DummyData(num=" << in_shape1(0); - fout << ", channels="<<std::abs(in_shape1(1)-in_shape2(1)); - fout << ", height="<<in_shape1(2); - fout << ", width="<<in_shape1(3); - fout << ");\n"; - - string smaller_layer = find_input_layer_caffe_name(i); - string bigger_layer = find_layer_caffe_name(i, i->attribute("tag")); - if (in_shape1(1) > in_shape2(1)) - swap(smaller_layer, bigger_layer); - - fout << " n." << i->caffe_layer_name() << "_concat = L.Concat(n." << smaller_layer; - fout << ", n." << i->caffe_layer_name() << "_zeropad"; - fout << ");\n"; - - fout << " n." << i->caffe_layer_name() << " = L.Eltwise(n." << i->caffe_layer_name() << "_concat"; - fout << ", n." << bigger_layer; - fout << ", operation=P.Eltwise.SUM"; - fout << ");\n"; - } - else - { - std::ostringstream sout; - sout << "The dlib network contained an add_prev layer (layer idx " << i->idx << ") that adds two previous "; - sout << "layers with different output tensor dimensions. Caffe's equivalent layer, Eltwise, doesn't support "; - sout << "adding layers together with different dimensions. In the special case where the only difference is "; - sout << "in the number of channels, this converter program will add a dummy layer that outputs a tensor full of zeros "; - sout << "and concat it appropriately so this will work. However, this network you are converting has tensor dimensions "; - sout << "different in values other than the number of channels. In particular, here are the two tensor shapes (batch size, channels, rows, cols): "; - std::ostringstream sout2; - sout2 << wrap_string(sout.str()) << endl; - sout2 << trans(in_shape1); - sout2 << trans(in_shape2); - throw dlib::error(sout2.str()); - } - } - else - { - fout << " n." << i->caffe_layer_name() << " = L.Eltwise(n." << find_input_layer_caffe_name(i); - fout << ", n." << find_layer_caffe_name(i, i->attribute("tag")); - fout << ", operation=P.Eltwise.SUM"; - fout << ");\n"; - } - } - else - { - throw dlib::error("No known transformation from dlib's " + i->detail_name + " layer to caffe."); - } - } - fout << " return n.to_proto();\n\n" << endl; - - - // ----------------------------------------------------------------------------------- - // The next block of code outputs python code that populates all the filter weights. - // ----------------------------------------------------------------------------------- - - ofstream fweights(out_weights_filename, ios::binary); - fout << "def set_network_weights(net):\n"; - fout << " # populate network parameters\n"; - fout << " f = open('"<<out_weights_filename<<"', 'rb');\n"; - // iterate the layers starting with the input layer - for (auto i = layers.rbegin(); i != layers.rend(); ++i) - { - // skip input and loss layers - if (i->type == "loss" || i->type == "input") - continue; - - - if (i->detail_name == "con") - { - const long num_filters = i->attribute("num_filters"); - matrix<float> weights = trans(rowm(i->params,range(0,i->params.size()-num_filters-1))); - matrix<float> biases = trans(rowm(i->params,range(i->params.size()-num_filters, i->params.size()-1))); - fweights.write((char*)&weights(0,0), weights.size()*sizeof(float)); - fweights.write((char*)&biases(0,0), biases.size()*sizeof(float)); - - // main filter weights - fout << " p = np.fromfile(f, dtype='float32', count="<<weights.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][0].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][0].data[:] = p;\n"; - - // biases - fout << " p = np.fromfile(f, dtype='float32', count="<<biases.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][1].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][1].data[:] = p;\n"; - } - else if (i->detail_name == "fc") - { - matrix<float> weights = trans(rowm(i->params, range(0,i->params.nr()-2))); - matrix<float> biases = rowm(i->params, i->params.nr()-1); - fweights.write((char*)&weights(0,0), weights.size()*sizeof(float)); - fweights.write((char*)&biases(0,0), biases.size()*sizeof(float)); - - // main filter weights - fout << " p = np.fromfile(f, dtype='float32', count="<<weights.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][0].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][0].data[:] = p;\n"; - - // biases - fout << " p = np.fromfile(f, dtype='float32', count="<<biases.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][1].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][1].data[:] = p;\n"; - } - else if (i->detail_name == "fc_no_bias") - { - matrix<float> weights = trans(i->params); - fweights.write((char*)&weights(0,0), weights.size()*sizeof(float)); - - // main filter weights - fout << " p = np.fromfile(f, dtype='float32', count="<<weights.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][0].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][0].data[:] = p;\n"; - } - else if (i->detail_name == "affine_con" || i->detail_name == "affine_fc") - { - const long dims = i->params.size()/2; - matrix<float> gamma = trans(rowm(i->params,range(0,dims-1))); - matrix<float> beta = trans(rowm(i->params,range(dims, 2*dims-1))); - fweights.write((char*)&gamma(0,0), gamma.size()*sizeof(float)); - fweights.write((char*)&beta(0,0), beta.size()*sizeof(float)); - - // set gamma weights - fout << " p = np.fromfile(f, dtype='float32', count="<<gamma.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][0].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][0].data[:] = p;\n"; - - // set beta weights - fout << " p = np.fromfile(f, dtype='float32', count="<<beta.size()<<");\n"; - fout << " p.shape = net.params['"<<i->caffe_layer_name()<<"'][1].data.shape;\n"; - fout << " net.params['"<<i->caffe_layer_name()<<"'][1].data[:] = p;\n"; - } - else if (i->detail_name == "prelu") - { - const double param = i->params(0); - - // main filter weights - fout << " tmp = net.params['"<<i->caffe_layer_name()<<"'][0].data.view();\n"; - fout << " tmp.shape = 1;\n"; - fout << " tmp[0] = "<<param<<";\n"; - } - } - -} - -// ---------------------------------------------------------------------------------------- - -int main(int argc, char** argv) try -{ - if (argc != 6) - { - cout << "To use this program, give it an xml file generated by dlib::net_to_xml() " << endl; - cout << "and then 4 numbers that indicate the input tensor size. It will convert " << endl; - cout << "the xml file into a python file that outputs a caffe model containing the dlib model." << endl; - cout << "For example, you might run this program like this: " << endl; - cout << " ./dtoc lenet.xml 1 1 28 28" << endl; - cout << "would convert the lenet.xml model into a caffe model with an input tensor of shape(1,1,28,28)" << endl; - cout << "where the shape values are (num samples in batch, num channels, num rows, num columns)." << endl; - return 0; - } - - const long N = sa = argv[2]; - const long K = sa = argv[3]; - const long NR = sa = argv[4]; - const long NC = sa = argv[5]; - - convert_dlib_xml_to_caffe_python_code(argv[1], N, K, NR, NC); - - return 0; -} -catch(std::exception& e) -{ - cout << "\n\n*************** ERROR CONVERTING TO CAFFE ***************\n" << e.what() << endl; - return 1; -} - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - -class doc_handler : public document_handler -{ -public: - std::vector<layer> layers; - bool seen_first_tag = false; - - layer next_layer; - std::stack<string> current_tag; - long tag_id = -1; - - - virtual void start_document ( - ) - { - layers.clear(); - seen_first_tag = false; - tag_id = -1; - } - - virtual void end_document ( - ) { } - - virtual void start_element ( - const unsigned long /*line_number*/, - const std::string& name, - const dlib::attribute_list& atts - ) - { - if (!seen_first_tag) - { - if (name != "net") - throw dlib::error("The top level XML tag must be a 'net' tag."); - seen_first_tag = true; - } - - if (name == "layer") - { - next_layer = layer(); - if (atts["type"] == "skip") - { - // Don't make a new layer, just apply the tag id to the previous layer - if (layers.size() == 0) - throw dlib::error("A skip layer was found as the first layer, but the first layer should be an input layer."); - layers.back().skip_id = sa = atts["id"]; - - // We intentionally leave next_layer empty so the end_element() callback - // don't add it as another layer when called. - } - else if (atts["type"] == "tag") - { - // Don't make a new layer, just remember the tag id so we can apply it on - // the next layer. - tag_id = sa = atts["id"]; - - // We intentionally leave next_layer empty so the end_element() callback - // don't add it as another layer when called. - } - else - { - next_layer.idx = sa = atts["idx"]; - next_layer.type = atts["type"]; - if (tag_id != -1) - { - next_layer.tag_id = tag_id; - tag_id = -1; - } - } - } - else if (current_tag.size() != 0 && current_tag.top() == "layer") - { - next_layer.detail_name = name; - // copy all the XML tag's attributes into the layer struct - atts.reset(); - while (atts.move_next()) - next_layer.attributes[atts.element().key()] = sa = atts.element().value(); - } - - current_tag.push(name); - } - - virtual void end_element ( - const unsigned long /*line_number*/, - const std::string& name - ) - { - current_tag.pop(); - if (name == "layer" && next_layer.type.size() != 0) - layers.push_back(next_layer); - } - - virtual void characters ( - const std::string& data - ) - { - if (current_tag.size() == 0) - return; - - if (comp_tags_with_params.count(current_tag.top()) != 0) - { - istringstream sin(data); - sin >> next_layer.params; - } - - } - - virtual void processing_instruction ( - const unsigned long /*line_number*/, - const std::string& /*target*/, - const std::string& /*data*/ - ) - { - } -}; - -// ---------------------------------------------------------------------------------------- - -void compute_output_tensor_shapes(const matrix<long,4,1>& input_tensor_shape, std::vector<layer>& layers) -{ - DLIB_CASSERT(layers.back().type == "input"); - layers.back().output_tensor_shape = input_tensor_shape; - for (auto i = ++layers.rbegin(); i != layers.rend(); ++i) - { - const auto input_shape = find_input_layer(i).output_tensor_shape; - if (i->type == "comp") - { - if (i->detail_name == "fc" || i->detail_name == "fc_no_bias") - { - long num_outputs = i->attribute("num_outputs"); - i->output_tensor_shape = {input_shape(0), num_outputs, 1, 1}; - } - else if (i->detail_name == "con") - { - long num_filters = i->attribute("num_filters"); - long filter_nc = i->attribute("nc"); - long filter_nr = i->attribute("nr"); - long stride_x = i->attribute("stride_x"); - long stride_y = i->attribute("stride_y"); - long padding_x = i->attribute("padding_x"); - long padding_y = i->attribute("padding_y"); - long nr = 1+(input_shape(2) + 2*padding_y - filter_nr)/stride_y; - long nc = 1+(input_shape(3) + 2*padding_x - filter_nc)/stride_x; - i->output_tensor_shape = {input_shape(0), num_filters, nr, nc}; - } - else if (i->detail_name == "max_pool" || i->detail_name == "avg_pool") - { - long filter_nc = i->attribute("nc"); - long filter_nr = i->attribute("nr"); - long stride_x = i->attribute("stride_x"); - long stride_y = i->attribute("stride_y"); - long padding_x = i->attribute("padding_x"); - long padding_y = i->attribute("padding_y"); - if (filter_nc != 0) - { - long nr = 1+(input_shape(2) + 2*padding_y - filter_nr)/stride_y; - long nc = 1+(input_shape(3) + 2*padding_x - filter_nc)/stride_x; - i->output_tensor_shape = {input_shape(0), input_shape(1), nr, nc}; - } - else // if we are filtering the whole input down to one thing - { - i->output_tensor_shape = {input_shape(0), input_shape(1), 1, 1}; - } - } - else if (i->detail_name == "add_prev") - { - auto aux_shape = find_layer(i, i->attribute("tag")).output_tensor_shape; - for (long j = 0; j < input_shape.size(); ++j) - i->output_tensor_shape(j) = std::max(input_shape(j), aux_shape(j)); - } - else - { - i->output_tensor_shape = input_shape; - } - } - else - { - i->output_tensor_shape = input_shape; - } - - } -} - -// ---------------------------------------------------------------------------------------- - -std::vector<layer> parse_dlib_xml( - const matrix<long,4,1>& input_tensor_shape, - const string& xml_filename -) -{ - doc_handler dh; - parse_xml(xml_filename, dh); - if (dh.layers.size() == 0) - throw dlib::error("No layers found in XML file!"); - - if (dh.layers.back().type != "input") - throw dlib::error("The network in the XML file is missing an input layer!"); - - compute_output_tensor_shapes(input_tensor_shape, dh.layers); - - return dh.layers; -} - -// ---------------------------------------------------------------------------------------- - diff --git a/ml/dlib/tools/convert_dlib_nets_to_caffe/running_a_dlib_model_with_caffe_example.py b/ml/dlib/tools/convert_dlib_nets_to_caffe/running_a_dlib_model_with_caffe_example.py deleted file mode 100755 index c03a7bf5c..000000000 --- a/ml/dlib/tools/convert_dlib_nets_to_caffe/running_a_dlib_model_with_caffe_example.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python - -# This script takes the dlib lenet model trained by the -# examples/dnn_introduction_ex.cpp example program and runs it using caffe. - -import caffe -import numpy as np - -# Before you run this program, you need to run dnn_introduction_ex.cpp to get a -# dlib lenet model. Then you need to convert that model into a "dlib to caffe -# model" python script. You can do this using the command line program -# included with dlib: tools/convert_dlib_nets_to_caffe. That program will -# output a lenet_dlib_to_caffe_model.py file. You run that program like this: -# ./dtoc lenet.xml 1 1 28 28 -# and it will create the lenet_dlib_to_caffe_model.py file, which we import -# with the next line: -import lenet_dlib_to_caffe_model as dlib_model - -# lenet_dlib_to_caffe_model defines a function, save_as_caffe_model() that does -# the work of converting dlib's DNN model to a caffe model and saves it to disk -# in two files. These files are all you need to run the model with caffe. -dlib_model.save_as_caffe_model('dlib_model_def.prototxt', 'dlib_model.proto') - -# Now that we created the caffe model files, we can load them into a caffe Net object. -net = caffe.Net('dlib_model_def.prototxt', 'dlib_model.proto', caffe.TEST); - - -# Now lets do a test, we will run one of the MNIST images through the network. - -# An MNIST image of a 7, it is the very first testing image in MNIST (i.e. wrt dnn_introduction_ex.cpp, it is testing_images[0]) -data = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0,84,185,159,151,60,36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0,222,254,254,254,254,241,198,198,198,198,198,198,198,198,170,52, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0,67,114,72,114,163,227,254,225,254,254,254,250,229,254,254,140, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,17,66,14,67,67,67,59,21,236,254,106, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,83,253,209,18, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,22,233,255,83, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,129,254,238,44, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,59,249,254,62, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,133,254,187,5, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,9,205,248,58, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,126,254,182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,75,251,240,57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,19,221,254,166, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,3,203,254,219,35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,38,254,254,77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,31,224,254,115,1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,133,254,254,52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,61,242,254,254,52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,121,254,254,219,40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,121,254,207,18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float32'); -data.shape = (dlib_model.input_batch_size, dlib_model.input_num_channels, dlib_model.input_num_rows, dlib_model.input_num_cols); - -# labels isn't logically needed but there doesn't seem to be a way to use -# caffe's Net interface without providing a superfluous input array. So we do -# that here. -labels = np.ones((dlib_model.input_batch_size), dtype='float32') -# Give the image to caffe -net.set_input_arrays(data/256, labels) -# Run the data through the network and get the results. -out = net.forward() - -# Print outputs, looping over minibatch. You should see that the network -# correctly classifies the image (it's the number 7). -for i in xrange(dlib_model.input_batch_size): - print i, 'net final layer = ', out['fc1'][i] - print i, 'predicted number =', np.argmax(out['fc1'][i]) - - - |