summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/data_io
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/data_io')
-rw-r--r--ml/dlib/dlib/data_io/image_dataset_metadata.cpp411
-rw-r--r--ml/dlib/dlib/data_io/image_dataset_metadata.h174
-rw-r--r--ml/dlib/dlib/data_io/libsvm_io.h276
-rw-r--r--ml/dlib/dlib/data_io/libsvm_io_abstract.h125
-rw-r--r--ml/dlib/dlib/data_io/load_image_dataset.h510
-rw-r--r--ml/dlib/dlib/data_io/load_image_dataset_abstract.h358
-rw-r--r--ml/dlib/dlib/data_io/mnist.cpp133
-rw-r--r--ml/dlib/dlib/data_io/mnist.h32
-rw-r--r--ml/dlib/dlib/data_io/mnist_abstract.h46
9 files changed, 0 insertions, 2065 deletions
diff --git a/ml/dlib/dlib/data_io/image_dataset_metadata.cpp b/ml/dlib/dlib/data_io/image_dataset_metadata.cpp
deleted file mode 100644
index 390ef6a0a..000000000
--- a/ml/dlib/dlib/data_io/image_dataset_metadata.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-// Copyright (C) 2011 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_IMAGE_DAtASET_METADATA_CPPh_
-#define DLIB_IMAGE_DAtASET_METADATA_CPPh_
-
-#include "image_dataset_metadata.h"
-
-#include <fstream>
-#include <sstream>
-#include "../compress_stream.h"
-#include "../base64.h"
-#include "../xml_parser.h"
-#include "../string.h"
-
-// ----------------------------------------------------------------------------------------
-
-namespace dlib
-{
- namespace image_dataset_metadata
- {
-
- // ------------------------------------------------------------------------------------
-
- const std::string get_decoded_string();
- void create_image_metadata_stylesheet_file(const std::string& main_filename)
- {
- std::string path;
- std::string::size_type pos = main_filename.find_last_of("/\\");
- if (pos != std::string::npos)
- path = main_filename.substr(0,pos+1);
-
- std::ofstream fout((path + "image_metadata_stylesheet.xsl").c_str());
- if (!fout)
- throw dlib::error("ERROR: Unable to open image_metadata_stylesheet.xsl for writing.");
-
- fout << get_decoded_string();
-
- if (!fout)
- throw dlib::error("ERROR: Unable to write to image_metadata_stylesheet.xsl.");
- }
-
- void save_image_dataset_metadata (
- const dataset& meta,
- const std::string& filename
- )
- {
- create_image_metadata_stylesheet_file(filename);
-
- const std::vector<image>& images = meta.images;
-
- std::ofstream fout(filename.c_str());
- if (!fout)
- throw dlib::error("ERROR: Unable to open " + filename + " for writing.");
-
- fout << "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
- fout << "<?xml-stylesheet type='text/xsl' href='image_metadata_stylesheet.xsl'?>\n";
- fout << "<dataset>\n";
- fout << "<name>" << meta.name << "</name>\n";
- fout << "<comment>" << meta.comment << "</comment>\n";
- fout << "<images>\n";
- for (unsigned long i = 0; i < images.size(); ++i)
- {
- fout << " <image file='" << images[i].filename << "'>\n";
-
- // save all the boxes
- for (unsigned long j = 0; j < images[i].boxes.size(); ++j)
- {
- const box& b = images[i].boxes[j];
- fout << " <box top='" << b.rect.top() << "' "
- << "left='" << b.rect.left() << "' "
- << "width='" << b.rect.width() << "' "
- << "height='" << b.rect.height() << "'";
- if (b.difficult)
- fout << " difficult='" << b.difficult << "'";
- if (b.truncated)
- fout << " truncated='" << b.truncated << "'";
- if (b.occluded)
- fout << " occluded='" << b.occluded << "'";
- if (b.ignore)
- fout << " ignore='" << b.ignore << "'";
- if (b.angle != 0)
- fout << " angle='" << b.angle << "'";
- if (b.age != 0)
- fout << " age='" << b.age << "'";
- if (b.gender == FEMALE)
- fout << " gender='female'";
- else if (b.gender == MALE)
- fout << " gender='male'";
- if (b.pose != 0)
- fout << " pose='" << b.pose << "'";
- if (b.detection_score != 0)
- fout << " detection_score='" << b.detection_score << "'";
-
- if (b.has_label() || b.parts.size() != 0)
- {
- fout << ">\n";
-
- if (b.has_label())
- fout << " <label>" << b.label << "</label>\n";
-
- // save all the parts
- std::map<std::string,point>::const_iterator itr;
- for (itr = b.parts.begin(); itr != b.parts.end(); ++itr)
- {
- fout << " <part name='"<< itr->first << "' x='"<< itr->second.x() <<"' y='"<< itr->second.y() <<"'/>\n";
- }
-
- fout << " </box>\n";
- }
- else
- {
- fout << "/>\n";
- }
- }
-
-
-
- fout << " </image>\n";
-
- if (!fout)
- throw dlib::error("ERROR: Unable to write to " + filename + ".");
- }
- fout << "</images>\n";
- fout << "</dataset>";
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- class doc_handler : public document_handler
- {
- std::vector<std::string> ts;
- image temp_image;
- box temp_box;
-
- dataset& meta;
-
- public:
-
- doc_handler(
- dataset& metadata_
- ):
- meta(metadata_)
- {}
-
-
- virtual void start_document (
- )
- {
- meta = dataset();
- ts.clear();
- temp_image = image();
- temp_box = box();
- }
-
- virtual void end_document (
- )
- {
- }
-
- virtual void start_element (
- const unsigned long line_number,
- const std::string& name,
- const dlib::attribute_list& atts
- )
- {
- try
- {
- if (ts.size() == 0)
- {
- if (name != "dataset")
- {
- std::ostringstream sout;
- sout << "Invalid XML document. Root tag must be <dataset>. Found <" << name << "> instead.";
- throw dlib::error(sout.str());
- }
- else
- {
- ts.push_back(name);
- return;
- }
- }
-
-
- if (name == "box")
- {
- if (atts.is_in_list("top")) temp_box.rect.top() = sa = atts["top"];
- else throw dlib::error("<box> missing required attribute 'top'");
-
- if (atts.is_in_list("left")) temp_box.rect.left() = sa = atts["left"];
- else throw dlib::error("<box> missing required attribute 'left'");
-
- if (atts.is_in_list("width")) temp_box.rect.right() = sa = atts["width"];
- else throw dlib::error("<box> missing required attribute 'width'");
-
- if (atts.is_in_list("height")) temp_box.rect.bottom() = sa = atts["height"];
- else throw dlib::error("<box> missing required attribute 'height'");
-
- if (atts.is_in_list("difficult")) temp_box.difficult = sa = atts["difficult"];
- if (atts.is_in_list("truncated")) temp_box.truncated = sa = atts["truncated"];
- if (atts.is_in_list("occluded")) temp_box.occluded = sa = atts["occluded"];
- if (atts.is_in_list("ignore")) temp_box.ignore = sa = atts["ignore"];
- if (atts.is_in_list("angle")) temp_box.angle = sa = atts["angle"];
- if (atts.is_in_list("age")) temp_box.age = sa = atts["age"];
- if (atts.is_in_list("gender"))
- {
- if (atts["gender"] == "male")
- temp_box.gender = MALE;
- else if (atts["gender"] == "female")
- temp_box.gender = FEMALE;
- else if (atts["gender"] == "unknown")
- temp_box.gender = UNKNOWN;
- else
- throw dlib::error("Invalid gender string in box attribute.");
- }
- if (atts.is_in_list("pose")) temp_box.pose = sa = atts["pose"];
- if (atts.is_in_list("detection_score")) temp_box.detection_score = sa = atts["detection_score"];
-
- temp_box.rect.bottom() += temp_box.rect.top()-1;
- temp_box.rect.right() += temp_box.rect.left()-1;
- }
- else if (name == "part" && ts.back() == "box")
- {
- point temp;
- if (atts.is_in_list("x")) temp.x() = sa = atts["x"];
- else throw dlib::error("<part> missing required attribute 'x'");
-
- if (atts.is_in_list("y")) temp.y() = sa = atts["y"];
- else throw dlib::error("<part> missing required attribute 'y'");
-
- if (atts.is_in_list("name"))
- {
- if (temp_box.parts.count(atts["name"])==0)
- {
- temp_box.parts[atts["name"]] = temp;
- }
- else
- {
- throw dlib::error("<part> with name '" + atts["name"] + "' is defined more than one time in a single box.");
- }
- }
- else
- {
- throw dlib::error("<part> missing required attribute 'name'");
- }
- }
- else if (name == "image")
- {
- temp_image.boxes.clear();
-
- if (atts.is_in_list("file")) temp_image.filename = atts["file"];
- else throw dlib::error("<image> missing required attribute 'file'");
- }
-
- ts.push_back(name);
- }
- catch (error& e)
- {
- throw dlib::error("Error on line " + cast_to_string(line_number) + ": " + e.what());
- }
- }
-
- virtual void end_element (
- const unsigned long ,
- const std::string& name
- )
- {
- ts.pop_back();
- if (ts.size() == 0)
- return;
-
- if (name == "box" && ts.back() == "image")
- {
- temp_image.boxes.push_back(temp_box);
- temp_box = box();
- }
- else if (name == "image" && ts.back() == "images")
- {
- meta.images.push_back(temp_image);
- temp_image = image();
- }
- }
-
- virtual void characters (
- const std::string& data
- )
- {
- if (ts.size() == 2 && ts[1] == "name")
- {
- meta.name = trim(data);
- }
- else if (ts.size() == 2 && ts[1] == "comment")
- {
- meta.comment = trim(data);
- }
- else if (ts.size() >= 2 && ts[ts.size()-1] == "label" &&
- ts[ts.size()-2] == "box")
- {
- temp_box.label = trim(data);
- }
- }
-
- virtual void processing_instruction (
- const unsigned long ,
- const std::string& ,
- const std::string&
- )
- {
- }
- };
-
- // ----------------------------------------------------------------------------------------
-
- class xml_error_handler : public error_handler
- {
- public:
- virtual void error (
- const unsigned long
- ) { }
-
- virtual void fatal_error (
- const unsigned long line_number
- )
- {
- std::ostringstream sout;
- sout << "There is a fatal error on line " << line_number << " so parsing will now halt.";
- throw dlib::error(sout.str());
- }
- };
-
- // ------------------------------------------------------------------------------------
-
- void load_image_dataset_metadata (
- dataset& meta,
- const std::string& filename
- )
- {
- xml_error_handler eh;
- doc_handler dh(meta);
-
- std::ifstream fin(filename.c_str());
- if (!fin)
- throw dlib::error("ERROR: unable to open " + filename + " for reading.");
-
- xml_parser parser;
- parser.add_document_handler(dh);
- parser.add_error_handler(eh);
- parser.parse(fin);
- }
-
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
- // ------------------------------------------------------------------------------------
-
- // This function returns the contents of the file 'images.xsl'
- const std::string get_decoded_string()
- {
- dlib::base64 base64_coder;
- dlib::compress_stream::kernel_1ea compressor;
- std::ostringstream sout;
- std::istringstream sin;
-
- // The base64 encoded data from the file 'image_metadata_stylesheet.xsl' we want to decode and return.
- sout << "PFWfgmWfCHr1DkV63lbjjeY2dCc2FbHDOVh0Kd7dkvaOfRYrOG24f0x77/5iMVq8FtE3UBxtGwSd";
- sout << "1ZHOHRSHgieNoeBv8ssJQ75RRxYtFKRY3OTPX5eKQoCN9jUaUnHnR4QZtEHgmKqXSs50Yrdd+2Ah";
- sout << "gNyarPZCiR6nvqNvCjtP2MP5FxleqNf8Fylatm2KdsXmrv5K87LYVN7i7JMkmZ++cTXYSOxDmxZi";
- sout << "OiCH8funXUdF9apDW547gCjz9HOQUI6dkz5dYUeFjfp6dFugpnaJyyprFLKq048Qk7+QiL4CNF/G";
- sout << "7e0VpBw8dMpiyRNi2fSQGSZGfIAUQKKT6+rPwQoRH2spdjsdXVWj4XQAqBX87nmqMnqjMhn/Vd1s";
- sout << "W5aoC0drwRGu3Xe3gn9vBL8hBkRXcJvEy6q/lb9bYnsLemhE5Zp/+nTmTBjfT9UFYLcsmgsjC+4n";
- sout << "Bq6h9QlpuyMYqJ8RvW8pp3mFlvXc3Yg+18t5F0hSMQfaIFYAuDPU2lVzPpY+ba0B39iu9IrPCLsS";
- sout << "+tUtSNSmQ74CtzZgKKjkTMA3nwYP2SDmZE3firq42pihT7hdU5vYkes69K8AQl8WZyLPpMww+r0z";
- sout << "+veEHPlAuxF7kL3ZvVjdB+xABwwqDe0kSRHRZINYdUfJwJdfYLyDnYoMjj6afqIJZ7QOBPZ42tV5";
- sout << "3hYOQTFwTNovOastzJJXQe1kxPg1AQ8ynmfjjJZqD0xKedlyeJybP919mVAA23UryHsq9TVlabou";
- sout << "qNl3xZW/mKKktvVsd/nuH62HIv/kgomyhaEUY5HgupupBUbQFZfyljZ5bl3g3V3Y1400Z1xTM/LL";
- sout << "LJpeLdlqoGzIe/19vAN1zUUVId9F/OLNUl3Zoar63yZERSJHcsuq/Pasisp0HIGi7rfI9EIQF7C/";
- sout << "IhLKLZsJ+LOycreQGOJALZIEZHOqxYLSXG0qaPM5bQL/MQJ2OZfwEhQgYOrjaM7oPOHHEfTq5kcO";
- sout << "daMwzefKfxrF2GXbUs0bYsEXsIGwENIUKMliFaAI4qKLxxb94oc+O3BRjWueZjZty2zKawQyTHNd";
- sout << "ltFJBUzfffdZN9Wq4zbPzntkM3U6Ys4LRztx5M15dtbhFeKx5rAf2tPXT6wU01hx7EJxBJzpvoDE";
- sout << "YwEoYVDSYulRKpgk82cHFzzUDgWXbl4paFSe1L1w8r9KHr67SYJDTUG86Lrm6LJ0rw73Xp0NAFcU";
- sout << "MKpiG9g1cHW74HYbUb/yAbtVWt40eB7M637umdo2jWz/r/vP5WnfSMXEbkyWebsa1fFceg/TLWy6";
- sout << "E8OTc4XKB48h1oFIlGagOiprxho3+F3TIcxDSwA=";
-
-
-
- // Put the data into the istream sin
- sin.str(sout.str());
- sout.str("");
-
- // Decode the base64 text into its compressed binary form
- base64_coder.decode(sin,sout);
- sin.clear();
- sin.str(sout.str());
- sout.str("");
-
- // Decompress the data into its original form
- compressor.decompress(sin,sout);
-
- // Return the decoded and decompressed data
- return sout.str();
- }
-
-
- }
-}
-
-// ----------------------------------------------------------------------------------------
-
-#endif // DLIB_IMAGE_DAtASET_METADATA_CPPh_
-
-
diff --git a/ml/dlib/dlib/data_io/image_dataset_metadata.h b/ml/dlib/dlib/data_io/image_dataset_metadata.h
deleted file mode 100644
index 3dac29ba6..000000000
--- a/ml/dlib/dlib/data_io/image_dataset_metadata.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright (C) 2011 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_IMAGE_DAtASET_METADATA_Hh_
-#define DLIB_IMAGE_DAtASET_METADATA_Hh_
-
-#include <string>
-#include <vector>
-#include "../geometry.h"
-
-// ----------------------------------------------------------------------------------------
-
-namespace dlib
-{
- namespace image_dataset_metadata
- {
-
- // ------------------------------------------------------------------------------------
-
- enum gender_t
- {
- UNKNOWN,
- MALE,
- FEMALE
- };
-
- // ------------------------------------------------------------------------------------
-
- struct box
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object represents an annotated rectangular area of an image.
- It is typically used to mark the location of an object such as a
- person, car, etc.
-
- The main variable of interest is rect. It gives the location of
- the box. All the other variables are optional.
- !*/
-
- box(
- ) :
- difficult(false),
- truncated(false),
- occluded(false),
- ignore(false),
- pose(0),
- detection_score(0),
- angle(0),
- gender(UNKNOWN),
- age(0)
- {}
-
- box (
- const rectangle& rect_
- ) :
- rect(rect_),
- difficult(false),
- truncated(false),
- occluded(false),
- ignore(false),
- pose(0),
- detection_score(0),
- angle(0),
- gender(UNKNOWN),
- age(0)
- {}
-
- rectangle rect;
-
- std::map<std::string,point> parts;
-
- // optional fields
- std::string label;
- bool difficult;
- bool truncated;
- bool occluded;
- bool ignore;
- double pose;
- double detection_score;
-
- // The angle of the object in radians. Positive values indicate that the
- // object at the center of the box is rotated clockwise by angle radians. A
- // value of 0 would indicate that the object is in its "standard" upright pose.
- // Therefore, to make the object appear upright we would have to rotate the
- // image counter-clockwise by angle radians.
- double angle;
-
- gender_t gender;
- double age;
-
- bool has_label() const { return label.size() != 0; }
- /*!
- ensures
- - returns true if label metadata is present and false otherwise.
- !*/
- };
-
- // ------------------------------------------------------------------------------------
-
- struct image
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object represents an annotated image.
- !*/
-
- image() {}
- image(const std::string& f) : filename(f) {}
-
- std::string filename;
- std::vector<box> boxes;
- };
-
- // ------------------------------------------------------------------------------------
-
- struct dataset
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object represents a labeled set of images. In particular, it
- contains the filename for each image as well as annotated boxes.
- !*/
-
- std::vector<image> images;
- std::string comment;
- std::string name;
- };
-
- // ------------------------------------------------------------------------------------
-
- void save_image_dataset_metadata (
- const dataset& meta,
- const std::string& filename
- );
- /*!
- ensures
- - Writes the contents of the meta object to a file with the given
- filename. The file will be in an XML format.
- throws
- - dlib::error
- This exception is thrown if there is an error which prevents
- this function from succeeding.
- !*/
-
- // ------------------------------------------------------------------------------------
-
- void load_image_dataset_metadata (
- dataset& meta,
- const std::string& filename
- );
- /*!
- ensures
- - Attempts to interpret filename as a file containing XML formatted data
- as produced by the save_image_dataset_metadata() function. Then
- meta is loaded with the contents of the file.
- throws
- - dlib::error
- This exception is thrown if there is an error which prevents
- this function from succeeding.
- !*/
-
- // ------------------------------------------------------------------------------------
-
- }
-}
-
-// ----------------------------------------------------------------------------------------
-
-#ifdef NO_MAKEFILE
-#include "image_dataset_metadata.cpp"
-#endif
-
-#endif // DLIB_IMAGE_DAtASET_METADATA_Hh_
-
diff --git a/ml/dlib/dlib/data_io/libsvm_io.h b/ml/dlib/dlib/data_io/libsvm_io.h
deleted file mode 100644
index f365e82d7..000000000
--- a/ml/dlib/dlib/data_io/libsvm_io.h
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright (C) 2010 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_LIBSVM_iO_Hh_
-#define DLIB_LIBSVM_iO_Hh_
-
-#include "libsvm_io_abstract.h"
-
-#include <fstream>
-#include <string>
-#include <utility>
-#include "../algs.h"
-#include "../matrix.h"
-#include "../string.h"
-#include "../svm/sparse_vector.h"
-#include <vector>
-
-namespace dlib
-{
- struct sample_data_io_error : public error
- {
- sample_data_io_error(const std::string& message): error(message) {}
- };
-
-// ----------------------------------------------------------------------------------------
-
- template <typename sample_type, typename label_type, typename alloc1, typename alloc2>
- void load_libsvm_formatted_data (
- const std::string& file_name,
- std::vector<sample_type, alloc1>& samples,
- std::vector<label_type, alloc2>& labels
- )
- {
- using namespace std;
- typedef typename sample_type::value_type pair_type;
- typedef typename basic_type<typename pair_type::first_type>::type key_type;
- typedef typename pair_type::second_type value_type;
-
- // You must use unsigned integral key types in your sparse vectors
- COMPILE_TIME_ASSERT(is_unsigned_type<key_type>::value);
-
- samples.clear();
- labels.clear();
-
- ifstream fin(file_name.c_str());
-
- if (!fin)
- throw sample_data_io_error("Unable to open file " + file_name);
-
- string line;
- istringstream sin;
- key_type key;
- value_type value;
- label_type label;
- sample_type sample;
- long line_num = 0;
- while (fin.peek() != EOF)
- {
- ++line_num;
- getline(fin, line);
-
- string::size_type pos = line.find_first_not_of(" \t\r\n");
-
- // ignore empty lines or comment lines
- if (pos == string::npos || line[pos] == '#')
- continue;
-
- sin.clear();
- sin.str(line);
- sample.clear();
-
- sin >> label;
-
- if (!sin)
- throw sample_data_io_error("On line: " + cast_to_string(line_num) + ", error while reading file " + file_name );
-
- // eat whitespace
- sin >> ws;
-
- while (sin.peek() != EOF && sin.peek() != '#')
- {
-
- sin >> key >> ws;
-
- // ignore what should be a : character
- if (sin.get() != ':')
- throw sample_data_io_error("On line: " + cast_to_string(line_num) + ", error while reading file " + file_name);
-
- sin >> value;
-
- if (sin && value != 0)
- {
- sample.insert(sample.end(), make_pair(key, value));
- }
-
- sin >> ws;
- }
-
- samples.push_back(sample);
- labels.push_back(label);
- }
-
- }
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
- template <typename sample_type, typename alloc>
- typename enable_if<is_const_type<typename sample_type::value_type::first_type> >::type
- fix_nonzero_indexing (
- std::vector<sample_type,alloc>& samples
- )
- {
- typedef typename sample_type::value_type pair_type;
- typedef typename basic_type<typename pair_type::first_type>::type key_type;
-
- if (samples.size() == 0)
- return;
-
- // figure out the min index value
- key_type min_idx = samples[0].begin()->first;
- for (unsigned long i = 0; i < samples.size(); ++i)
- min_idx = std::min(min_idx, samples[i].begin()->first);
-
- // Now adjust all the samples so that their min index value is zero.
- if (min_idx != 0)
- {
- sample_type temp;
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- // copy samples[i] into temp but make sure it has a min index of zero.
- temp.clear();
- typename sample_type::iterator j;
- for (j = samples[i].begin(); j != samples[i].end(); ++j)
- {
- temp.insert(temp.end(), std::make_pair(j->first-min_idx, j->second));
- }
-
- // replace the current sample with temp.
- samples[i].swap(temp);
- }
- }
- }
-
-// ----------------------------------------------------------------------------------------
-
-// If the "first" values in the std::pair objects are not const then we can modify them
-// directly and that is what this version of fix_nonzero_indexing() does.
- template <typename sample_type, typename alloc>
- typename disable_if<is_const_type<typename sample_type::value_type::first_type> >::type
- fix_nonzero_indexing (
- std::vector<sample_type,alloc>& samples
- )
- {
- typedef typename sample_type::value_type pair_type;
- typedef typename basic_type<typename pair_type::first_type>::type key_type;
-
- if (samples.size() == 0)
- return;
-
- // figure out the min index value
- key_type min_idx = samples[0].begin()->first;
- for (unsigned long i = 0; i < samples.size(); ++i)
- min_idx = std::min(min_idx, samples[i].begin()->first);
-
- // Now adjust all the samples so that their min index value is zero.
- if (min_idx != 0)
- {
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- typename sample_type::iterator j;
- for (j = samples[i].begin(); j != samples[i].end(); ++j)
- {
- j->first -= min_idx;
- }
- }
- }
- }
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
-// This is an overload for sparse vectors
- template <typename sample_type, typename label_type, typename alloc1, typename alloc2>
- typename disable_if<is_matrix<sample_type>,void>::type save_libsvm_formatted_data (
- const std::string& file_name,
- const std::vector<sample_type, alloc1>& samples,
- const std::vector<label_type, alloc2>& labels
- )
- {
- typedef typename sample_type::value_type pair_type;
- typedef typename basic_type<typename pair_type::first_type>::type key_type;
-
- // You must use unsigned integral key types in your sparse vectors
- COMPILE_TIME_ASSERT(is_unsigned_type<key_type>::value);
-
- // make sure requires clause is not broken
- DLIB_ASSERT(samples.size() == labels.size(),
- "\t void save_libsvm_formatted_data()"
- << "\n\t You have to have labels for each sample and vice versa"
- << "\n\t samples.size(): " << samples.size()
- << "\n\t labels.size(): " << labels.size()
- );
-
-
- using namespace std;
- ofstream fout(file_name.c_str());
- fout.precision(14);
-
- if (!fout)
- throw sample_data_io_error("Unable to open file " + file_name);
-
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- fout << labels[i];
-
- for (typename sample_type::const_iterator j = samples[i].begin(); j != samples[i].end(); ++j)
- {
- if (j->second != 0)
- fout << " " << j->first << ":" << j->second;
- }
- fout << "\n";
-
- if (!fout)
- throw sample_data_io_error("Error while writing to file " + file_name);
- }
-
- }
-
-// ----------------------------------------------------------------------------------------
-
-// This is an overload for dense vectors
- template <typename sample_type, typename label_type, typename alloc1, typename alloc2>
- typename enable_if<is_matrix<sample_type>,void>::type save_libsvm_formatted_data (
- const std::string& file_name,
- const std::vector<sample_type, alloc1>& samples,
- const std::vector<label_type, alloc2>& labels
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(samples.size() == labels.size(),
- "\t void save_libsvm_formatted_data()"
- << "\n\t You have to have labels for each sample and vice versa"
- << "\n\t samples.size(): " << samples.size()
- << "\n\t labels.size(): " << labels.size()
- );
-
- using namespace std;
- ofstream fout(file_name.c_str());
- fout.precision(14);
-
- if (!fout)
- throw sample_data_io_error("Unable to open file " + file_name);
-
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- fout << labels[i];
-
- for (long j = 0; j < samples[i].size(); ++j)
- {
- if (samples[i](j) != 0)
- fout << " " << j << ":" << samples[i](j);
- }
- fout << "\n";
-
- if (!fout)
- throw sample_data_io_error("Error while writing to file " + file_name);
- }
-
- }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LIBSVM_iO_Hh_
-
diff --git a/ml/dlib/dlib/data_io/libsvm_io_abstract.h b/ml/dlib/dlib/data_io/libsvm_io_abstract.h
deleted file mode 100644
index 88d934fdb..000000000
--- a/ml/dlib/dlib/data_io/libsvm_io_abstract.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright (C) 2010 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_LIBSVM_iO_ABSTRACT_Hh_
-#ifdef DLIB_LIBSVM_iO_ABSTRACT_Hh_
-
-#include <fstream>
-#include <string>
-#include <utility>
-#include "../algs.h"
-#include "../matrix.h"
-#include <vector>
-
-namespace dlib
-{
- struct sample_data_io_error : public error
- {
- /*!
- This is the exception class used by the file IO functions defined below.
- !*/
- };
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename sample_type,
- typename label_type,
- typename alloc1,
- typename alloc2
- >
- void load_libsvm_formatted_data (
- const std::string& file_name,
- std::vector<sample_type, alloc1>& samples,
- std::vector<label_type, alloc2>& labels
- );
- /*!
- requires
- - sample_type must be an STL container
- - sample_type::value_type == std::pair<T,U> where T is some kind of
- unsigned integral type
- ensures
- - attempts to read a file of the given name that should contain libsvm
- formatted data. We turn the data into sparse vectors and store it
- in samples
- - #labels.size() == #samples.size()
- - for all valid i: #labels[i] is the label for #samples[i]
- throws
- - sample_data_io_error
- This exception is thrown if there is any problem loading data from file
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename sample_type,
- typename label_type,
- typename alloc1,
- typename alloc2
- >
- void save_libsvm_formatted_data (
- const std::string& file_name,
- const std::vector<sample_type, alloc1>& samples,
- const std::vector<label_type, alloc2>& labels
- );
- /*!
- requires
- - sample_type must be an STL container
- - sample_type::value_type == std::pair<T,U> where T is some kind of
- unsigned integral type
- - samples.size() == labels.size()
- ensures
- - saves the data to the given file in libsvm format
- throws
- - sample_data_io_error
- This exception is thrown if there is any problem saving data to file
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename sample_type,
- typename label_type,
- typename alloc1,
- typename alloc2
- >
- void save_libsvm_formatted_data (
- const std::string& file_name,
- const std::vector<sample_type, alloc1>& samples,
- const std::vector<label_type, alloc2>& labels
- );
- /*!
- requires
- - sample_type == a dense matrix (i.e. dlib::matrix)
- - for all valid i: is_vector(samples[i]) == true
- - samples.size() == labels.size()
- ensures
- - saves the data to the given file in libsvm format
- throws
- - sample_data_io_error
- This exception is thrown if there is any problem saving data to file
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <typename sample_type, typename alloc>
- void fix_nonzero_indexing (
- std::vector<sample_type,alloc>& samples
- );
- /*!
- requires
- - samples must only contain valid sparse vectors. The definition of
- a sparse vector can be found at the top of dlib/svm/sparse_vector_abstract.h
- ensures
- - Adjusts the sparse vectors in samples so that they are zero-indexed.
- Or in other words, assume the smallest used index value in any of the sparse
- vectors is N. Then this function subtracts N from all the index values in
- samples. This is useful, for example, if you load a libsvm formatted datafile
- with features indexed from 1 rather than 0 and you would like to fix this.
- !*/
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LIBSVM_iO_ABSTRACT_Hh_
-
diff --git a/ml/dlib/dlib/data_io/load_image_dataset.h b/ml/dlib/dlib/data_io/load_image_dataset.h
deleted file mode 100644
index 5664d96b2..000000000
--- a/ml/dlib/dlib/data_io/load_image_dataset.h
+++ /dev/null
@@ -1,510 +0,0 @@
-// Copyright (C) 2012 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_LOAD_IMAGE_DaTASET_Hh_
-#define DLIB_LOAD_IMAGE_DaTASET_Hh_
-
-#include "load_image_dataset_abstract.h"
-#include "../misc_api.h"
-#include "../dir_nav.h"
-#include "../image_io.h"
-#include "../array.h"
-#include <vector>
-#include "../geometry.h"
-#include "image_dataset_metadata.h"
-#include <string>
-#include <set>
-#include "../image_processing/full_object_detection.h"
-#include <utility>
-#include <limits>
-#include "../image_transforms/image_pyramid.h"
-
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- class image_dataset_file
- {
- public:
- image_dataset_file(const std::string& filename)
- {
- _skip_empty_images = false;
- _have_parts = false;
- _filename = filename;
- _box_area_thresh = std::numeric_limits<double>::infinity();
- }
-
- image_dataset_file boxes_match_label(
- const std::string& label
- ) const
- {
- image_dataset_file temp(*this);
- temp._labels.insert(label);
- return temp;
- }
-
- image_dataset_file skip_empty_images(
- ) const
- {
- image_dataset_file temp(*this);
- temp._skip_empty_images = true;
- return temp;
- }
-
- image_dataset_file boxes_have_parts(
- ) const
- {
- image_dataset_file temp(*this);
- temp._have_parts = true;
- return temp;
- }
-
- image_dataset_file shrink_big_images(
- double new_box_area_thresh = 150*150
- ) const
- {
- image_dataset_file temp(*this);
- temp._box_area_thresh = new_box_area_thresh;
- return temp;
- }
-
- bool should_load_box (
- const image_dataset_metadata::box& box
- ) const
- {
- if (_have_parts && box.parts.size() == 0)
- return false;
- if (_labels.size() == 0)
- return true;
- if (_labels.count(box.label) != 0)
- return true;
- return false;
- }
-
- const std::string& get_filename() const { return _filename; }
- bool should_skip_empty_images() const { return _skip_empty_images; }
- bool should_boxes_have_parts() const { return _have_parts; }
- double box_area_thresh() const { return _box_area_thresh; }
- const std::set<std::string>& get_selected_box_labels() const { return _labels; }
-
- private:
- std::string _filename;
- std::set<std::string> _labels;
- bool _skip_empty_images;
- bool _have_parts;
- double _box_area_thresh;
-
- };
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<rectangle> >& object_locations,
- const image_dataset_file& source
- )
- {
- images.clear();
- object_locations.clear();
-
- std::vector<std::vector<rectangle> > ignored_rects;
-
- using namespace dlib::image_dataset_metadata;
- dataset data;
- load_image_dataset_metadata(data, source.get_filename());
-
- // Set the current directory to be the one that contains the
- // metadata file. We do this because the file might contain
- // file paths which are relative to this folder.
- locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
-
-
- typedef typename array_type::value_type image_type;
-
-
- image_type img;
- std::vector<rectangle> rects, ignored;
- for (unsigned long i = 0; i < data.images.size(); ++i)
- {
- double min_rect_size = std::numeric_limits<double>::infinity();
- rects.clear();
- ignored.clear();
- for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
- {
- if (source.should_load_box(data.images[i].boxes[j]))
- {
- if (data.images[i].boxes[j].ignore)
- {
- ignored.push_back(data.images[i].boxes[j].rect);
- }
- else
- {
- rects.push_back(data.images[i].boxes[j].rect);
- min_rect_size = std::min<double>(min_rect_size, rects.back().area());
- }
- }
- }
-
- if (!source.should_skip_empty_images() || rects.size() != 0)
- {
- load_image(img, data.images[i].filename);
- if (rects.size() != 0)
- {
- // if shrinking the image would still result in the smallest box being
- // bigger than the box area threshold then shrink the image.
- while(min_rect_size/2/2 > source.box_area_thresh())
- {
- pyramid_down<2> pyr;
- pyr(img);
- min_rect_size *= (1.0/2.0)*(1.0/2.0);
- for (auto&& r : rects)
- r = pyr.rect_down(r);
- for (auto&& r : ignored)
- r = pyr.rect_down(r);
- }
- while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
- {
- pyramid_down<3> pyr;
- pyr(img);
- min_rect_size *= (2.0/3.0)*(2.0/3.0);
- for (auto&& r : rects)
- r = pyr.rect_down(r);
- for (auto&& r : ignored)
- r = pyr.rect_down(r);
- }
- }
- images.push_back(img);
- object_locations.push_back(rects);
- ignored_rects.push_back(ignored);
- }
- }
-
- return ignored_rects;
- }
-
-// ----------------------------------------------------------------------------------------
-
- namespace impl
- {
- inline size_t num_non_ignored_boxes (const std::vector<mmod_rect>& rects)
- {
- size_t cnt = 0;
- for (auto& b : rects)
- {
- if (!b.ignore)
- cnt++;
- }
- return cnt;
- }
- }
-
- template <
- typename array_type
- >
- void load_image_dataset (
- array_type& images,
- std::vector<std::vector<mmod_rect> >& object_locations,
- const image_dataset_file& source
- )
- {
- images.clear();
- object_locations.clear();
-
- using namespace dlib::image_dataset_metadata;
- dataset data;
- load_image_dataset_metadata(data, source.get_filename());
-
- // Set the current directory to be the one that contains the
- // metadata file. We do this because the file might contain
- // file paths which are relative to this folder.
- locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
-
- typedef typename array_type::value_type image_type;
-
- image_type img;
- std::vector<mmod_rect> rects;
- for (unsigned long i = 0; i < data.images.size(); ++i)
- {
- double min_rect_size = std::numeric_limits<double>::infinity();
- rects.clear();
- for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
- {
- if (source.should_load_box(data.images[i].boxes[j]))
- {
- if (data.images[i].boxes[j].ignore)
- {
- rects.push_back(ignored_mmod_rect(data.images[i].boxes[j].rect));
- }
- else
- {
- rects.push_back(mmod_rect(data.images[i].boxes[j].rect));
- min_rect_size = std::min<double>(min_rect_size, rects.back().rect.area());
- }
- rects.back().label = data.images[i].boxes[j].label;
-
- }
- }
-
- if (!source.should_skip_empty_images() || impl::num_non_ignored_boxes(rects) != 0)
- {
- load_image(img, data.images[i].filename);
- if (rects.size() != 0)
- {
- // if shrinking the image would still result in the smallest box being
- // bigger than the box area threshold then shrink the image.
- while(min_rect_size/2/2 > source.box_area_thresh())
- {
- pyramid_down<2> pyr;
- pyr(img);
- min_rect_size *= (1.0/2.0)*(1.0/2.0);
- for (auto&& r : rects)
- r.rect = pyr.rect_down(r.rect);
- }
- while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
- {
- pyramid_down<3> pyr;
- pyr(img);
- min_rect_size *= (2.0/3.0)*(2.0/3.0);
- for (auto&& r : rects)
- r.rect = pyr.rect_down(r.rect);
- }
- }
- images.push_back(std::move(img));
- object_locations.push_back(std::move(rects));
- }
- }
- }
-
-// ----------------------------------------------------------------------------------------
-
-// ******* THIS FUNCTION IS DEPRECATED, you should use another version of load_image_dataset() *******
- template <
- typename image_type,
- typename MM
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array<image_type,MM>& images,
- std::vector<std::vector<rectangle> >& object_locations,
- const std::string& filename,
- const std::string& label,
- bool skip_empty_images = false
- )
- {
- image_dataset_file f(filename);
- if (label.size() != 0)
- f = f.boxes_match_label(label);
- if (skip_empty_images)
- f = f.skip_empty_images();
- return load_image_dataset(images, object_locations, f);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<rectangle> >& object_locations,
- const std::string& filename
- )
- {
- return load_image_dataset(images, object_locations, image_dataset_file(filename));
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- void load_image_dataset (
- array_type& images,
- std::vector<std::vector<mmod_rect>>& object_locations,
- const std::string& filename
- )
- {
- load_image_dataset(images, object_locations, image_dataset_file(filename));
- }
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<full_object_detection> >& object_locations,
- const image_dataset_file& source,
- std::vector<std::string>& parts_list
- )
- {
- typedef typename array_type::value_type image_type;
- parts_list.clear();
- images.clear();
- object_locations.clear();
-
- using namespace dlib::image_dataset_metadata;
- dataset data;
- load_image_dataset_metadata(data, source.get_filename());
-
- // Set the current directory to be the one that contains the
- // metadata file. We do this because the file might contain
- // file paths which are relative to this folder.
- locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
-
-
- std::set<std::string> all_parts;
-
- // find out what parts are being used in the dataset. Store results in all_parts.
- for (unsigned long i = 0; i < data.images.size(); ++i)
- {
- for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
- {
- if (source.should_load_box(data.images[i].boxes[j]))
- {
- const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
- std::map<std::string,point>::const_iterator itr;
-
- for (itr = parts.begin(); itr != parts.end(); ++itr)
- {
- all_parts.insert(itr->first);
- }
- }
- }
- }
-
- // make a mapping between part names and the integers [0, all_parts.size())
- std::map<std::string,int> parts_idx;
- for (std::set<std::string>::iterator i = all_parts.begin(); i != all_parts.end(); ++i)
- {
- parts_idx[*i] = parts_list.size();
- parts_list.push_back(*i);
- }
-
- std::vector<std::vector<rectangle> > ignored_rects;
- std::vector<rectangle> ignored;
- image_type img;
- std::vector<full_object_detection> object_dets;
- for (unsigned long i = 0; i < data.images.size(); ++i)
- {
- double min_rect_size = std::numeric_limits<double>::infinity();
- object_dets.clear();
- ignored.clear();
- for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
- {
- if (source.should_load_box(data.images[i].boxes[j]))
- {
- if (data.images[i].boxes[j].ignore)
- {
- ignored.push_back(data.images[i].boxes[j].rect);
- }
- else
- {
- std::vector<point> partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT);
-
- // populate partlist with all the parts present in this box.
- const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
- std::map<std::string,point>::const_iterator itr;
- for (itr = parts.begin(); itr != parts.end(); ++itr)
- {
- partlist[parts_idx[itr->first]] = itr->second;
- }
-
- object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist));
- min_rect_size = std::min<double>(min_rect_size, object_dets.back().get_rect().area());
- }
- }
- }
-
- if (!source.should_skip_empty_images() || object_dets.size() != 0)
- {
- load_image(img, data.images[i].filename);
- if (object_dets.size() != 0)
- {
- // if shrinking the image would still result in the smallest box being
- // bigger than the box area threshold then shrink the image.
- while(min_rect_size/2/2 > source.box_area_thresh())
- {
- pyramid_down<2> pyr;
- pyr(img);
- min_rect_size *= (1.0/2.0)*(1.0/2.0);
- for (auto&& r : object_dets)
- {
- r.get_rect() = pyr.rect_down(r.get_rect());
- for (unsigned long k = 0; k < r.num_parts(); ++k)
- r.part(k) = pyr.point_down(r.part(k));
- }
- for (auto&& r : ignored)
- {
- r = pyr.rect_down(r);
- }
- }
- while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
- {
- pyramid_down<3> pyr;
- pyr(img);
- min_rect_size *= (2.0/3.0)*(2.0/3.0);
- for (auto&& r : object_dets)
- {
- r.get_rect() = pyr.rect_down(r.get_rect());
- for (unsigned long k = 0; k < r.num_parts(); ++k)
- r.part(k) = pyr.point_down(r.part(k));
- }
- for (auto&& r : ignored)
- {
- r = pyr.rect_down(r);
- }
- }
- }
- images.push_back(img);
- object_locations.push_back(object_dets);
- ignored_rects.push_back(ignored);
- }
- }
-
-
- return ignored_rects;
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<full_object_detection> >& object_locations,
- const image_dataset_file& source
- )
- {
- std::vector<std::string> parts_list;
- return load_image_dataset(images, object_locations, source, parts_list);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<full_object_detection> >& object_locations,
- const std::string& filename
- )
- {
- std::vector<std::string> parts_list;
- return load_image_dataset(images, object_locations, image_dataset_file(filename), parts_list);
- }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LOAD_IMAGE_DaTASET_Hh_
-
diff --git a/ml/dlib/dlib/data_io/load_image_dataset_abstract.h b/ml/dlib/dlib/data_io/load_image_dataset_abstract.h
deleted file mode 100644
index b06252098..000000000
--- a/ml/dlib/dlib/data_io/load_image_dataset_abstract.h
+++ /dev/null
@@ -1,358 +0,0 @@
-// Copyright (C) 2012 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_LOAD_IMAGE_DaTASET_ABSTRACT_Hh_
-#ifdef DLIB_LOAD_IMAGE_DaTASET_ABSTRACT_Hh_
-
-#include "image_dataset_metadata.h"
-#include "../array/array_kernel_abstract.h"
-#include <string>
-#include <vector>
-#include "../image_processing/full_object_detection_abstract.h"
-
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- class image_dataset_file
- {
- /*!
- WHAT THIS OBJECT REPRESENTS
- This object is a tool used to tell the load_image_dataset() functions which
- boxes and images to load from an XML based image dataset file. By default,
- this object tells load_image_dataset() to load all images and object boxes.
- !*/
-
- public:
- image_dataset_file(
- const std::string& filename
- );
- /*!
- ensures
- - #get_filename() == filename
- - #should_skip_empty_images() == false
- - #get_selected_box_labels().size() == 0
- This means that, initially, all boxes will be loaded. Therefore, for all
- possible boxes B we have:
- - #should_load_box(B) == true
- - #box_area_thresh() == infinity
- !*/
-
- const std::string& get_filename(
- ) const;
- /*!
- ensures
- - returns the name of the XML image dataset metadata file given to this
- object's constructor.
- !*/
-
- bool should_skip_empty_images(
- ) const;
- /*!
- ensures
- - returns true if we are supposed to skip images that don't have any
- non-ignored boxes to load when loading an image dataset using
- load_image_dataset().
- !*/
-
- image_dataset_file boxes_match_label(
- const std::string& label
- ) const;
- /*!
- ensures
- - returns a copy of *this that is identical in all respects to *this except
- that label will be included in the labels set (i.e. the set returned by
- get_selected_box_labels()).
- !*/
-
- const std::set<std::string>& get_selected_box_labels(
- ) const;
- /*!
- ensures
- - returns the set of box labels currently selected by the should_load_box()
- method. Note that if the set is empty then we select all boxes.
- !*/
-
- image_dataset_file skip_empty_images(
- ) const;
- /*!
- ensures
- - returns a copy of *this that is identical in all respects to *this except
- that #should_skip_empty_images() == true.
- !*/
-
- bool should_boxes_have_parts(
- ) const;
- /*!
- ensures
- - returns true if boxes must have some parts defined for them to be loaded.
- !*/
-
- image_dataset_file boxes_have_parts(
- ) const;
- /*!
- ensures
- - returns a copy of *this that is identical in all respects to *this except
- that #should_boxes_have_parts() == true.
- !*/
-
- bool should_load_box (
- const image_dataset_metadata::box& box
- ) const;
- /*!
- ensures
- - returns true if we are supposed to load the given box from an image
- dataset XML file. In particular, if should_load_box() returns false then
- the load_image_dataset() routines will not return the box at all, neither
- in the ignore rectangles list or in the primary object_locations vector.
- The behavior of this function is defined as follows:
- - if (should_boxes_have_parts() && boxes.parts.size() == 0) then
- - returns false
- - else if (get_selected_box_labels().size() == 0) then
- - returns true
- - else if (get_selected_box_labels().count(box.label) != 0) then
- - returns true
- - else
- - returns false
- !*/
-
- image_dataset_file shrink_big_images(
- double new_box_area_thresh = 150*150
- ) const;
- /*!
- ensures
- - returns a copy of *this that is identical in all respects to *this except
- that #box_area_thresh() == new_box_area_thresh
- !*/
-
- double box_area_thresh(
- ) const;
- /*!
- ensures
- - If the smallest non-ignored rectangle in an image has an area greater
- than box_area_thresh() then we will shrink the image until the area of
- the box is about equal to box_area_thresh(). This is useful if you have
- a dataset containing very high resolution images and you don't want to
- load it in its native high resolution. Setting the box_area_thresh()
- allows you to control the resolution of the loaded images.
- !*/
- };
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<rectangle> >& object_locations,
- const image_dataset_file& source
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - This routine loads the images and their associated object boxes from the
- image metadata file indicated by source.get_filename(). This metadata file
- should be in the XML format used by the save_image_dataset_metadata() routine.
- - #images.size() == The number of images loaded from the metadata file. This
- is all the images listed in the file unless source.should_skip_empty_images()
- is set to true.
- - #images.size() == #object_locations.size()
- - This routine is capable of loading any image format which can be read by the
- load_image() routine.
- - let IGNORED_RECTS denote the vector returned from this function.
- - IGNORED_RECTS.size() == #object_locations.size()
- - IGNORED_RECTS == a list of the rectangles which have the "ignore" flag set to
- true in the input XML file.
- - for all valid i:
- - #images[i] == a copy of the i-th image from the dataset.
- - #object_locations[i] == a vector of all the rectangles associated with
- #images[i]. These are the rectangles for which source.should_load_box()
- returns true and are also not marked as "ignore" in the XML file.
- - IGNORED_RECTS[i] == A vector of all the rectangles associated with #images[i]
- that are marked as "ignore" but not discarded by source.should_load_box().
- - if (source.should_skip_empty_images() == true) then
- - #object_locations[i].size() != 0
- (i.e. we won't load images that don't end up having any object locations)
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<rectangle> >& object_locations,
- const std::string& filename
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - performs: return load_image_dataset(images, object_locations, image_dataset_file(filename));
- (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- void load_image_dataset (
- array_type& images,
- std::vector<std::vector<mmod_rect> >& object_locations,
- const image_dataset_file& source
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - This function has essentially the same behavior as the above
- load_image_dataset() routines, except here we output to a vector of
- mmod_rects instead of rectangles. In this case, both ignore and non-ignore
- rectangles go into object_locations since mmod_rect has an ignore boolean
- field that records the ignored/non-ignored state of each rectangle. We also store
- a each box's string label into the mmod_rect::label field as well.
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- void load_image_dataset (
- array_type& images,
- std::vector<std::vector<mmod_rect> >& object_locations,
- const std::string& filename
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - performs: load_image_dataset(images, object_locations, image_dataset_file(filename));
- (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
- !*/
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<full_object_detection> >& object_locations,
- const image_dataset_file& source,
- std::vector<std::string>& parts_list
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - This routine loads the images and their associated object locations from the
- image metadata file indicated by source.get_filename(). This metadata file
- should be in the XML format used by the save_image_dataset_metadata() routine.
- - The difference between this function and the version of load_image_dataset()
- defined above is that this version will also load object part information and
- thus fully populates the full_object_detection objects.
- - #images.size() == The number of images loaded from the metadata file. This
- is all the images listed in the file unless source.should_skip_empty_images()
- is set to true.
- - #images.size() == #object_locations.size()
- - This routine is capable of loading any image format which can be read
- by the load_image() routine.
- - #parts_list == a vector that contains the list of object parts found in the
- input file and loaded into object_locations.
- - #parts_list is in lexicographic sorted order.
- - let IGNORED_RECTS denote the vector returned from this function.
- - IGNORED_RECTS.size() == #object_locations.size()
- - IGNORED_RECTS == a list of the rectangles which have the "ignore" flag set to
- true in the input XML file.
- - for all valid i:
- - #images[i] == a copy of the i-th image from the dataset.
- - #object_locations[i] == a vector of all the rectangles associated with
- #images[i]. These are the rectangles for which source.should_load_box()
- returns true and are also not marked as "ignore" in the XML file.
- - IGNORED_RECTS[i] == A vector of all the rectangles associated with #images[i]
- that are marked as "ignore" but not discarded by source.should_load_box().
- - if (source.should_skip_empty_images() == true) then
- - #object_locations[i].size() != 0
- (i.e. we won't load images that don't end up having any object locations)
- - for all valid j:
- - #object_locations[i][j].num_parts() == #parts_list.size()
- - for all valid k:
- - #object_locations[i][j].part(k) == the location of the part
- with name #parts_list[k] or OBJECT_PART_NOT_PRESENT if the
- part was not indicated for object #object_locations[i][j].
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<full_object_detection> >& object_locations,
- const image_dataset_file& source
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - performs: return load_image_dataset(images, object_locations, source, parts_list);
- (i.e. this function simply calls the above function and discards the output
- parts_list. So it is just a convenience function you can call if you don't
- care about getting the parts list.)
- !*/
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename array_type
- >
- std::vector<std::vector<rectangle> > load_image_dataset (
- array_type& images,
- std::vector<std::vector<full_object_detection> >& object_locations,
- const std::string& filename
- );
- /*!
- requires
- - array_type == An array of images. This is anything with an interface that
- looks like std::vector<some generic image type> where a "generic image" is
- anything that implements the generic image interface defined in
- dlib/image_processing/generic_image.h.
- ensures
- - performs: return load_image_dataset(images, object_locations, image_dataset_file(filename));
- (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
- !*/
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_LOAD_IMAGE_DaTASET_ABSTRACT_Hh_
-
-
diff --git a/ml/dlib/dlib/data_io/mnist.cpp b/ml/dlib/dlib/data_io/mnist.cpp
deleted file mode 100644
index d6a62fb67..000000000
--- a/ml/dlib/dlib/data_io/mnist.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_MNIST_CPp_
-#define DLIB_MNIST_CPp_
-
-#include "mnist.h"
-#include <fstream>
-#include "../byte_orderer.h"
-#include "../uintn.h"
-
-// ----------------------------------------------------------------------------------------
-
-namespace dlib
-{
- void load_mnist_dataset (
- const std::string& folder_name,
- std::vector<matrix<unsigned char> >& training_images,
- std::vector<unsigned long>& training_labels,
- std::vector<matrix<unsigned char> >& testing_images,
- std::vector<unsigned long>& testing_labels
- )
- {
- using namespace std;
- ifstream fin1((folder_name+"/train-images-idx3-ubyte").c_str(), ios::binary);
- if (!fin1)
- {
- fin1.open((folder_name + "/train-images.idx3-ubyte").c_str(), ios::binary);
- }
-
- ifstream fin2((folder_name+"/train-labels-idx1-ubyte").c_str(), ios::binary);
- if (!fin2)
- {
- fin2.open((folder_name + "/train-labels.idx1-ubyte").c_str(), ios::binary);
- }
-
- ifstream fin3((folder_name+"/t10k-images-idx3-ubyte").c_str(), ios::binary);
- if (!fin3)
- {
- fin3.open((folder_name + "/t10k-images.idx3-ubyte").c_str(), ios::binary);
- }
-
- ifstream fin4((folder_name+"/t10k-labels-idx1-ubyte").c_str(), ios::binary);
- if (!fin4)
- {
- fin4.open((folder_name + "/t10k-labels.idx1-ubyte").c_str(), ios::binary);
- }
-
- if (!fin1) throw error("Unable to open file train-images-idx3-ubyte or train-images.idx3-ubyte");
- if (!fin2) throw error("Unable to open file train-labels-idx1-ubyte or train-labels.idx1-ubyte");
- if (!fin3) throw error("Unable to open file t10k-images-idx3-ubyte or t10k-images.idx3-ubyte");
- if (!fin4) throw error("Unable to open file t10k-labels-idx1-ubyte or t10k-labels.idx1-ubyte");
-
- byte_orderer bo;
-
- // make sure the files have the contents we expect.
- uint32 magic, num, nr, nc, num2, num3, num4;
- fin1.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
- fin1.read((char*)&num, sizeof(num)); bo.big_to_host(num);
- fin1.read((char*)&nr, sizeof(nr)); bo.big_to_host(nr);
- fin1.read((char*)&nc, sizeof(nc)); bo.big_to_host(nc);
- if (magic != 2051 || num != 60000 || nr != 28 || nc != 28)
- throw error("mndist dat files are corrupted.");
-
- fin2.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
- fin2.read((char*)&num2, sizeof(num2)); bo.big_to_host(num2);
- if (magic != 2049 || num2 != 60000)
- throw error("mndist dat files are corrupted.");
-
- fin3.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
- fin3.read((char*)&num3, sizeof(num3)); bo.big_to_host(num3);
- fin3.read((char*)&nr, sizeof(nr)); bo.big_to_host(nr);
- fin3.read((char*)&nc, sizeof(nc)); bo.big_to_host(nc);
- if (magic != 2051 || num3 != 10000 || nr != 28 || nc != 28)
- throw error("mndist dat files are corrupted.");
-
- fin4.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic);
- fin4.read((char*)&num4, sizeof(num4)); bo.big_to_host(num4);
- if (magic != 2049 || num4 != 10000)
- throw error("mndist dat files are corrupted.");
-
- if (!fin1) throw error("Unable to read train-images-idx3-ubyte");
- if (!fin2) throw error("Unable to read train-labels-idx1-ubyte");
- if (!fin3) throw error("Unable to read t10k-images-idx3-ubyte");
- if (!fin4) throw error("Unable to read t10k-labels-idx1-ubyte");
-
-
- training_images.resize(60000);
- training_labels.resize(60000);
- testing_images.resize(10000);
- testing_labels.resize(10000);
-
- for (size_t i = 0; i < training_images.size(); ++i)
- {
- training_images[i].set_size(nr,nc);
- fin1.read((char*)&training_images[i](0,0), nr*nc);
- }
- for (size_t i = 0; i < training_labels.size(); ++i)
- {
- char l;
- fin2.read(&l, 1);
- training_labels[i] = l;
- }
-
- for (size_t i = 0; i < testing_images.size(); ++i)
- {
- testing_images[i].set_size(nr,nc);
- fin3.read((char*)&testing_images[i](0,0), nr*nc);
- }
- for (size_t i = 0; i < testing_labels.size(); ++i)
- {
- char l;
- fin4.read(&l, 1);
- testing_labels[i] = l;
- }
-
- if (!fin1) throw error("Unable to read train-images-idx3-ubyte");
- if (!fin2) throw error("Unable to read train-labels-idx1-ubyte");
- if (!fin3) throw error("Unable to read t10k-images-idx3-ubyte");
- if (!fin4) throw error("Unable to read t10k-labels-idx1-ubyte");
-
- if (fin1.get() != EOF) throw error("Unexpected bytes at end of train-images-idx3-ubyte");
- if (fin2.get() != EOF) throw error("Unexpected bytes at end of train-labels-idx1-ubyte");
- if (fin3.get() != EOF) throw error("Unexpected bytes at end of t10k-images-idx3-ubyte");
- if (fin4.get() != EOF) throw error("Unexpected bytes at end of t10k-labels-idx1-ubyte");
- }
-}
-
-// ----------------------------------------------------------------------------------------
-
-#endif // DLIB_MNIST_CPp_
-
-
-
diff --git a/ml/dlib/dlib/data_io/mnist.h b/ml/dlib/dlib/data_io/mnist.h
deleted file mode 100644
index e71be6f2b..000000000
--- a/ml/dlib/dlib/data_io/mnist.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_MNIST_Hh_
-#define DLIB_MNIST_Hh_
-
-#include "mnist_abstract.h"
-#include <string>
-#include <vector>
-#include "../matrix.h"
-
-// ----------------------------------------------------------------------------------------
-
-namespace dlib
-{
- void load_mnist_dataset (
- const std::string& folder_name,
- std::vector<matrix<unsigned char> >& training_images,
- std::vector<unsigned long>& training_labels,
- std::vector<matrix<unsigned char> >& testing_images,
- std::vector<unsigned long>& testing_labels
- );
-}
-
-// ----------------------------------------------------------------------------------------
-
-#ifdef NO_MAKEFILE
-#include "mnist.cpp"
-#endif
-
-#endif // DLIB_MNIST_Hh_
-
-
diff --git a/ml/dlib/dlib/data_io/mnist_abstract.h b/ml/dlib/dlib/data_io/mnist_abstract.h
deleted file mode 100644
index 09121633e..000000000
--- a/ml/dlib/dlib/data_io/mnist_abstract.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) 2015 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#undef DLIB_MNIST_ABSTRACT_Hh_
-#ifdef DLIB_MNIST_ABSTRACT_Hh_
-
-#include <string>
-#include <vector>
-#include "../matrix.h"
-
-// ----------------------------------------------------------------------------------------
-
-namespace dlib
-{
- void load_mnist_dataset (
- const std::string& folder_name,
- std::vector<matrix<unsigned char> >& training_images,
- std::vector<unsigned long>& training_labels,
- std::vector<matrix<unsigned char> >& testing_images,
- std::vector<unsigned long>& testing_labels
- );
- /*!
- ensures
- - Attempts to load the MNIST dataset from the hard drive. This is the dataset
- of handwritten digits available from http://yann.lecun.com/exdb/mnist/. In
- particular, the 4 files comprising the MNIST dataset should be present in the
- folder indicated by folder_name. These four files are:
- - train-images-idx3-ubyte
- - train-labels-idx1-ubyte
- - t10k-images-idx3-ubyte
- - t10k-labels-idx1-ubyte
- - #training_images == The 60,000 training images from the dataset.
- - #training_labels == The labels for the contents of #training_images.
- I.e. #training_labels[i] is the label of #training_images[i].
- - #testing_images == The 10,000 testing images from the dataset.
- - #testing_labels == The labels for the contents of #testing_images.
- I.e. #testing_labels[i] is the label of #testing_images[i].
- throws
- - dlib::error if some problem prevents us from loading the data or the files
- can't be found.
- !*/
-}
-
-// ----------------------------------------------------------------------------------------
-
-#endif // DLIB_MNIST_ABSTRACT_Hh_
-