diff options
Diffstat (limited to 'ml/dlib/tools/python/src/other.cpp')
-rw-r--r-- | ml/dlib/tools/python/src/other.cpp | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/ml/dlib/tools/python/src/other.cpp b/ml/dlib/tools/python/src/other.cpp new file mode 100644 index 00000000..3e014902 --- /dev/null +++ b/ml/dlib/tools/python/src/other.cpp @@ -0,0 +1,268 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include "opaque_types.h" +#include <dlib/python.h> +#include <dlib/matrix.h> +#include <dlib/data_io.h> +#include <dlib/sparse_vector.h> +#include <dlib/optimization.h> +#include <dlib/statistics/running_gradient.h> + +using namespace dlib; +using namespace std; +namespace py = pybind11; + +typedef std::vector<std::pair<unsigned long,double> > sparse_vect; + + +void _make_sparse_vector ( + sparse_vect& v +) +{ + make_sparse_vector_inplace(v); +} + +void _make_sparse_vector2 ( + std::vector<sparse_vect>& v +) +{ + for (unsigned long i = 0; i < v.size(); ++i) + make_sparse_vector_inplace(v[i]); +} + +py::tuple _load_libsvm_formatted_data( + const std::string& file_name +) +{ + std::vector<sparse_vect> samples; + std::vector<double> labels; + load_libsvm_formatted_data(file_name, samples, labels); + return py::make_tuple(samples, labels); +} + +void _save_libsvm_formatted_data ( + const std::string& file_name, + const std::vector<sparse_vect>& samples, + const std::vector<double>& labels +) +{ + pyassert(samples.size() == labels.size(), "Invalid inputs"); + save_libsvm_formatted_data(file_name, samples, labels); +} + +// ---------------------------------------------------------------------------------------- + +py::list _max_cost_assignment ( + const matrix<double>& cost +) +{ + if (cost.nr() != cost.nc()) + throw dlib::error("The input matrix must be square."); + + // max_cost_assignment() only works with integer matrices, so convert from + // double to integer. + const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost)); + matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale)); + return vector_to_python_list(max_cost_assignment(int_cost)); +} + +double _assignment_cost ( + const matrix<double>& cost, + const py::list& assignment +) +{ + return assignment_cost(cost, python_list_to_vector<long>(assignment)); +} + +// ---------------------------------------------------------------------------------------- + +size_t py_count_steps_without_decrease ( + py::object arr, + double probability_of_decrease +) +{ + DLIB_CASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1); + return count_steps_without_decrease(python_list_to_vector<double>(arr), probability_of_decrease); +} + +// ---------------------------------------------------------------------------------------- + +size_t py_count_steps_without_decrease_robust ( + py::object arr, + double probability_of_decrease, + double quantile_discard +) +{ + DLIB_CASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1); + DLIB_CASSERT(0 <= quantile_discard && quantile_discard <= 1); + return count_steps_without_decrease_robust(python_list_to_vector<double>(arr), probability_of_decrease, quantile_discard); +} + +// ---------------------------------------------------------------------------------------- + +double probability_that_sequence_is_increasing ( + py::object arr +) +{ + DLIB_CASSERT(len(arr) > 2); + return probability_gradient_greater_than(python_list_to_vector<double>(arr), 0); +} + +// ---------------------------------------------------------------------------------------- + +void hit_enter_to_continue() +{ + std::cout << "Hit enter to continue"; + std::cin.get(); +} + +// ---------------------------------------------------------------------------------------- + +void bind_other(py::module &m) +{ + m.def("max_cost_assignment", _max_cost_assignment, py::arg("cost"), +"requires \n\ + - cost.nr() == cost.nc() \n\ + (i.e. the input must be a square matrix) \n\ +ensures \n\ + - Finds and returns the solution to the following optimization problem: \n\ + \n\ + Maximize: f(A) == assignment_cost(cost, A) \n\ + Subject to the following constraints: \n\ + - The elements of A are unique. That is, there aren't any \n\ + elements of A which are equal. \n\ + - len(A) == cost.nr() \n\ + \n\ + - Note that this function converts the input cost matrix into a 64bit fixed \n\ + point representation. Therefore, you should make sure that the values in \n\ + your cost matrix can be accurately represented by 64bit fixed point values. \n\ + If this is not the case then the solution my become inaccurate due to \n\ + rounding error. In general, this function will work properly when the ratio \n\ + of the largest to the smallest value in cost is no more than about 1e16. " + ); + + m.def("assignment_cost", _assignment_cost, py::arg("cost"),py::arg("assignment"), +"requires \n\ + - cost.nr() == cost.nc() \n\ + (i.e. the input must be a square matrix) \n\ + - for all valid i: \n\ + - 0 <= assignment[i] < cost.nr() \n\ +ensures \n\ + - Interprets cost as a cost assignment matrix. That is, cost[i][j] \n\ + represents the cost of assigning i to j. \n\ + - Interprets assignment as a particular set of assignments. That is, \n\ + i is assigned to assignment[i]. \n\ + - returns the cost of the given assignment. That is, returns \n\ + a number which is: \n\ + sum over i: cost[i][assignment[i]] " + ); + + m.def("make_sparse_vector", _make_sparse_vector , +"This function modifies its argument so that it is a properly sorted sparse vector. \n\ +This means that the elements of the sparse vector will be ordered so that pairs \n\ +with smaller indices come first. Additionally, there won't be any pairs with \n\ +identical indices. If such pairs were present in the input sparse vector then \n\ +their values will be added together and only one pair with their index will be \n\ +present in the output. " + ); + m.def("make_sparse_vector", _make_sparse_vector2 , + "This function modifies a sparse_vectors object so that all elements it contains are properly sorted sparse vectors."); + + m.def("load_libsvm_formatted_data",_load_libsvm_formatted_data, py::arg("file_name"), +"ensures \n\ + - Attempts to read a file of the given name that should contain libsvm \n\ + formatted data. The data is returned as a tuple where the first tuple \n\ + element is an array of sparse vectors and the second element is an array of \n\ + labels. " + ); + + m.def("save_libsvm_formatted_data",_save_libsvm_formatted_data, py::arg("file_name"), py::arg("samples"), py::arg("labels"), +"requires \n\ + - len(samples) == len(labels) \n\ +ensures \n\ + - saves the data to the given file in libsvm format " + ); + + m.def("hit_enter_to_continue", hit_enter_to_continue, + "Asks the user to hit enter to continue and pauses until they do so."); + + + + + m.def("count_steps_without_decrease",py_count_steps_without_decrease, py::arg("time_series"), py::arg("probability_of_decrease")=0.51, +"requires \n\ + - time_series must be a one dimensional array of real numbers. \n\ + - 0.5 < probability_of_decrease < 1 \n\ +ensures \n\ + - If you think of the contents of time_series as a potentially noisy time \n\ + series, then this function returns a count of how long the time series has \n\ + gone without noticeably decreasing in value. It does this by scanning along \n\ + the elements, starting from the end (i.e. time_series[-1]) to the beginning, \n\ + and checking how many elements you need to examine before you are confident \n\ + that the series has been decreasing in value. Here, \"confident of decrease\" \n\ + means the probability of decrease is >= probability_of_decrease. \n\ + - Setting probability_of_decrease to 0.51 means we count until we see even a \n\ + small hint of decrease, whereas a larger value of 0.99 would return a larger \n\ + count since it keeps going until it is nearly certain the time series is \n\ + decreasing. \n\ + - The max possible output from this function is len(time_series). \n\ + - The implementation of this function is done using the dlib::running_gradient \n\ + object, which is a tool that finds the least squares fit of a line to the \n\ + time series and the confidence interval around the slope of that line. That \n\ + can then be used in a simple statistical test to determine if the slope is \n\ + positive or negative." + /*! + requires + - time_series must be a one dimensional array of real numbers. + - 0.5 < probability_of_decrease < 1 + ensures + - If you think of the contents of time_series as a potentially noisy time + series, then this function returns a count of how long the time series has + gone without noticeably decreasing in value. It does this by scanning along + the elements, starting from the end (i.e. time_series[-1]) to the beginning, + and checking how many elements you need to examine before you are confident + that the series has been decreasing in value. Here, "confident of decrease" + means the probability of decrease is >= probability_of_decrease. + - Setting probability_of_decrease to 0.51 means we count until we see even a + small hint of decrease, whereas a larger value of 0.99 would return a larger + count since it keeps going until it is nearly certain the time series is + decreasing. + - The max possible output from this function is len(time_series). + - The implementation of this function is done using the dlib::running_gradient + object, which is a tool that finds the least squares fit of a line to the + time series and the confidence interval around the slope of that line. That + can then be used in a simple statistical test to determine if the slope is + positive or negative. + !*/ + ); + + m.def("count_steps_without_decrease_robust",py_count_steps_without_decrease_robust, py::arg("time_series"), py::arg("probability_of_decrease")=0.51, py::arg("quantile_discard")=0.1, +"requires \n\ + - time_series must be a one dimensional array of real numbers. \n\ + - 0.5 < probability_of_decrease < 1 \n\ + - 0 <= quantile_discard <= 1 \n\ +ensures \n\ + - This function behaves just like \n\ + count_steps_without_decrease(time_series,probability_of_decrease) except that \n\ + it ignores values in the time series that are in the upper quantile_discard \n\ + quantile. So for example, if the quantile discard is 0.1 then the 10% \n\ + largest values in the time series are ignored." + /*! + requires + - time_series must be a one dimensional array of real numbers. + - 0.5 < probability_of_decrease < 1 + - 0 <= quantile_discard <= 1 + ensures + - This function behaves just like + count_steps_without_decrease(time_series,probability_of_decrease) except that + it ignores values in the time series that are in the upper quantile_discard + quantile. So for example, if the quantile discard is 0.1 then the 10% + largest values in the time series are ignored. + !*/ + ); + + m.def("probability_that_sequence_is_increasing",probability_that_sequence_is_increasing, py::arg("time_series"), + "returns the probability that the given sequence of real numbers is increasing in value over time."); +} + |