summaryrefslogtreecommitdiffstats
path: root/ml/dlib/tools/python/src/other.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/tools/python/src/other.cpp')
-rw-r--r--ml/dlib/tools/python/src/other.cpp268
1 files changed, 268 insertions, 0 deletions
diff --git a/ml/dlib/tools/python/src/other.cpp b/ml/dlib/tools/python/src/other.cpp
new file mode 100644
index 00000000..3e014902
--- /dev/null
+++ b/ml/dlib/tools/python/src/other.cpp
@@ -0,0 +1,268 @@
+// Copyright (C) 2013 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+
+#include "opaque_types.h"
+#include <dlib/python.h>
+#include <dlib/matrix.h>
+#include <dlib/data_io.h>
+#include <dlib/sparse_vector.h>
+#include <dlib/optimization.h>
+#include <dlib/statistics/running_gradient.h>
+
+using namespace dlib;
+using namespace std;
+namespace py = pybind11;
+
+typedef std::vector<std::pair<unsigned long,double> > sparse_vect;
+
+
+void _make_sparse_vector (
+ sparse_vect& v
+)
+{
+ make_sparse_vector_inplace(v);
+}
+
+void _make_sparse_vector2 (
+ std::vector<sparse_vect>& v
+)
+{
+ for (unsigned long i = 0; i < v.size(); ++i)
+ make_sparse_vector_inplace(v[i]);
+}
+
+py::tuple _load_libsvm_formatted_data(
+ const std::string& file_name
+)
+{
+ std::vector<sparse_vect> samples;
+ std::vector<double> labels;
+ load_libsvm_formatted_data(file_name, samples, labels);
+ return py::make_tuple(samples, labels);
+}
+
+void _save_libsvm_formatted_data (
+ const std::string& file_name,
+ const std::vector<sparse_vect>& samples,
+ const std::vector<double>& labels
+)
+{
+ pyassert(samples.size() == labels.size(), "Invalid inputs");
+ save_libsvm_formatted_data(file_name, samples, labels);
+}
+
+// ----------------------------------------------------------------------------------------
+
+py::list _max_cost_assignment (
+ const matrix<double>& cost
+)
+{
+ if (cost.nr() != cost.nc())
+ throw dlib::error("The input matrix must be square.");
+
+ // max_cost_assignment() only works with integer matrices, so convert from
+ // double to integer.
+ const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
+ matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
+ return vector_to_python_list(max_cost_assignment(int_cost));
+}
+
+double _assignment_cost (
+ const matrix<double>& cost,
+ const py::list& assignment
+)
+{
+ return assignment_cost(cost, python_list_to_vector<long>(assignment));
+}
+
+// ----------------------------------------------------------------------------------------
+
+size_t py_count_steps_without_decrease (
+ py::object arr,
+ double probability_of_decrease
+)
+{
+ DLIB_CASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1);
+ return count_steps_without_decrease(python_list_to_vector<double>(arr), probability_of_decrease);
+}
+
+// ----------------------------------------------------------------------------------------
+
+size_t py_count_steps_without_decrease_robust (
+ py::object arr,
+ double probability_of_decrease,
+ double quantile_discard
+)
+{
+ DLIB_CASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1);
+ DLIB_CASSERT(0 <= quantile_discard && quantile_discard <= 1);
+ return count_steps_without_decrease_robust(python_list_to_vector<double>(arr), probability_of_decrease, quantile_discard);
+}
+
+// ----------------------------------------------------------------------------------------
+
+double probability_that_sequence_is_increasing (
+ py::object arr
+)
+{
+ DLIB_CASSERT(len(arr) > 2);
+ return probability_gradient_greater_than(python_list_to_vector<double>(arr), 0);
+}
+
+// ----------------------------------------------------------------------------------------
+
+void hit_enter_to_continue()
+{
+ std::cout << "Hit enter to continue";
+ std::cin.get();
+}
+
+// ----------------------------------------------------------------------------------------
+
+void bind_other(py::module &m)
+{
+ m.def("max_cost_assignment", _max_cost_assignment, py::arg("cost"),
+"requires \n\
+ - cost.nr() == cost.nc() \n\
+ (i.e. the input must be a square matrix) \n\
+ensures \n\
+ - Finds and returns the solution to the following optimization problem: \n\
+ \n\
+ Maximize: f(A) == assignment_cost(cost, A) \n\
+ Subject to the following constraints: \n\
+ - The elements of A are unique. That is, there aren't any \n\
+ elements of A which are equal. \n\
+ - len(A) == cost.nr() \n\
+ \n\
+ - Note that this function converts the input cost matrix into a 64bit fixed \n\
+ point representation. Therefore, you should make sure that the values in \n\
+ your cost matrix can be accurately represented by 64bit fixed point values. \n\
+ If this is not the case then the solution my become inaccurate due to \n\
+ rounding error. In general, this function will work properly when the ratio \n\
+ of the largest to the smallest value in cost is no more than about 1e16. "
+ );
+
+ m.def("assignment_cost", _assignment_cost, py::arg("cost"),py::arg("assignment"),
+"requires \n\
+ - cost.nr() == cost.nc() \n\
+ (i.e. the input must be a square matrix) \n\
+ - for all valid i: \n\
+ - 0 <= assignment[i] < cost.nr() \n\
+ensures \n\
+ - Interprets cost as a cost assignment matrix. That is, cost[i][j] \n\
+ represents the cost of assigning i to j. \n\
+ - Interprets assignment as a particular set of assignments. That is, \n\
+ i is assigned to assignment[i]. \n\
+ - returns the cost of the given assignment. That is, returns \n\
+ a number which is: \n\
+ sum over i: cost[i][assignment[i]] "
+ );
+
+ m.def("make_sparse_vector", _make_sparse_vector ,
+"This function modifies its argument so that it is a properly sorted sparse vector. \n\
+This means that the elements of the sparse vector will be ordered so that pairs \n\
+with smaller indices come first. Additionally, there won't be any pairs with \n\
+identical indices. If such pairs were present in the input sparse vector then \n\
+their values will be added together and only one pair with their index will be \n\
+present in the output. "
+ );
+ m.def("make_sparse_vector", _make_sparse_vector2 ,
+ "This function modifies a sparse_vectors object so that all elements it contains are properly sorted sparse vectors.");
+
+ m.def("load_libsvm_formatted_data",_load_libsvm_formatted_data, py::arg("file_name"),
+"ensures \n\
+ - Attempts to read a file of the given name that should contain libsvm \n\
+ formatted data. The data is returned as a tuple where the first tuple \n\
+ element is an array of sparse vectors and the second element is an array of \n\
+ labels. "
+ );
+
+ m.def("save_libsvm_formatted_data",_save_libsvm_formatted_data, py::arg("file_name"), py::arg("samples"), py::arg("labels"),
+"requires \n\
+ - len(samples) == len(labels) \n\
+ensures \n\
+ - saves the data to the given file in libsvm format "
+ );
+
+ m.def("hit_enter_to_continue", hit_enter_to_continue,
+ "Asks the user to hit enter to continue and pauses until they do so.");
+
+
+
+
+ m.def("count_steps_without_decrease",py_count_steps_without_decrease, py::arg("time_series"), py::arg("probability_of_decrease")=0.51,
+"requires \n\
+ - time_series must be a one dimensional array of real numbers. \n\
+ - 0.5 < probability_of_decrease < 1 \n\
+ensures \n\
+ - If you think of the contents of time_series as a potentially noisy time \n\
+ series, then this function returns a count of how long the time series has \n\
+ gone without noticeably decreasing in value. It does this by scanning along \n\
+ the elements, starting from the end (i.e. time_series[-1]) to the beginning, \n\
+ and checking how many elements you need to examine before you are confident \n\
+ that the series has been decreasing in value. Here, \"confident of decrease\" \n\
+ means the probability of decrease is >= probability_of_decrease. \n\
+ - Setting probability_of_decrease to 0.51 means we count until we see even a \n\
+ small hint of decrease, whereas a larger value of 0.99 would return a larger \n\
+ count since it keeps going until it is nearly certain the time series is \n\
+ decreasing. \n\
+ - The max possible output from this function is len(time_series). \n\
+ - The implementation of this function is done using the dlib::running_gradient \n\
+ object, which is a tool that finds the least squares fit of a line to the \n\
+ time series and the confidence interval around the slope of that line. That \n\
+ can then be used in a simple statistical test to determine if the slope is \n\
+ positive or negative."
+ /*!
+ requires
+ - time_series must be a one dimensional array of real numbers.
+ - 0.5 < probability_of_decrease < 1
+ ensures
+ - If you think of the contents of time_series as a potentially noisy time
+ series, then this function returns a count of how long the time series has
+ gone without noticeably decreasing in value. It does this by scanning along
+ the elements, starting from the end (i.e. time_series[-1]) to the beginning,
+ and checking how many elements you need to examine before you are confident
+ that the series has been decreasing in value. Here, "confident of decrease"
+ means the probability of decrease is >= probability_of_decrease.
+ - Setting probability_of_decrease to 0.51 means we count until we see even a
+ small hint of decrease, whereas a larger value of 0.99 would return a larger
+ count since it keeps going until it is nearly certain the time series is
+ decreasing.
+ - The max possible output from this function is len(time_series).
+ - The implementation of this function is done using the dlib::running_gradient
+ object, which is a tool that finds the least squares fit of a line to the
+ time series and the confidence interval around the slope of that line. That
+ can then be used in a simple statistical test to determine if the slope is
+ positive or negative.
+ !*/
+ );
+
+ m.def("count_steps_without_decrease_robust",py_count_steps_without_decrease_robust, py::arg("time_series"), py::arg("probability_of_decrease")=0.51, py::arg("quantile_discard")=0.1,
+"requires \n\
+ - time_series must be a one dimensional array of real numbers. \n\
+ - 0.5 < probability_of_decrease < 1 \n\
+ - 0 <= quantile_discard <= 1 \n\
+ensures \n\
+ - This function behaves just like \n\
+ count_steps_without_decrease(time_series,probability_of_decrease) except that \n\
+ it ignores values in the time series that are in the upper quantile_discard \n\
+ quantile. So for example, if the quantile discard is 0.1 then the 10% \n\
+ largest values in the time series are ignored."
+ /*!
+ requires
+ - time_series must be a one dimensional array of real numbers.
+ - 0.5 < probability_of_decrease < 1
+ - 0 <= quantile_discard <= 1
+ ensures
+ - This function behaves just like
+ count_steps_without_decrease(time_series,probability_of_decrease) except that
+ it ignores values in the time series that are in the upper quantile_discard
+ quantile. So for example, if the quantile discard is 0.1 then the 10%
+ largest values in the time series are ignored.
+ !*/
+ );
+
+ m.def("probability_that_sequence_is_increasing",probability_that_sequence_is_increasing, py::arg("time_series"),
+ "returns the probability that the given sequence of real numbers is increasing in value over time.");
+}
+