summaryrefslogtreecommitdiffstats
path: root/ml/dlib/docs/docs/parsing.xml
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/docs/docs/parsing.xml')
-rw-r--r--ml/dlib/docs/docs/parsing.xml652
1 files changed, 652 insertions, 0 deletions
diff --git a/ml/dlib/docs/docs/parsing.xml b/ml/dlib/docs/docs/parsing.xml
new file mode 100644
index 000000000..b993acaae
--- /dev/null
+++ b/ml/dlib/docs/docs/parsing.xml
@@ -0,0 +1,652 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<?xml-stylesheet type="text/xsl" href="stylesheet.xsl"?>
+
+<doc>
+ <title>Parsing</title>
+
+ <!-- ************************************************************************* -->
+
+ <body>
+
+ <p>
+ This page documents the objects and functions that in some way deal with parsing or otherwise
+ manipulating text.
+ Everything here follows the same conventions as the rest of the library.
+ </p>
+
+
+
+ </body>
+
+ <!-- ************************************************************************* -->
+
+ <menu width="150">
+ <top>
+ <section>
+ <name>Objects</name>
+ <item>cmd_line_parser</item>
+ <item>config_reader</item>
+ <item>cpp_pretty_printer</item>
+ <item>cpp_tokenizer</item>
+ <item>tokenizer</item>
+ <item>xml_parser</item>
+ <item>base64</item>
+ <item>unichar</item>
+ <item>ustring</item>
+ <item>basic_utf8_ifstream</item>
+
+ </section>
+
+ <section>
+ <name>Global Functions</name>
+ <item>string_cast</item>
+ <item>string_assign</item>
+ <item>cast_to_string</item>
+ <item>pad_int_with_zeros</item>
+ <item>cast_to_wstring</item>
+ <item>wrap_string</item>
+ <item>narrow</item>
+ <item>trim</item>
+ <item>ltrim</item>
+ <item>rtrim</item>
+ <item>pad</item>
+ <item>lpad</item>
+ <item>rpad</item>
+ <item>split_on_first</item>
+ <item>split_on_last</item>
+ <item>left_substr</item>
+ <item>right_substr</item>
+ <item>split</item>
+ <item>tolower</item>
+ <item>toupper</item>
+ <item>convert_utf8_to_utf32</item>
+ <item>is_combining_char</item>
+ <item>strings_equal_ignore_case</item>
+ </section>
+ </top>
+ </menu>
+
+ <!-- ************************************************************************* -->
+ <!-- ************************************************************************* -->
+ <!-- ************************************************************************* -->
+
+ <components>
+
+
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>toupper</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to convert a string to all uppercase.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>tolower</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to convert a string to all lowercase.
+ </description>
+
+ </component>
+
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>split_on_first</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ Breaks a string into two parts. The split point is selected based
+ on the first occurrence of a delimiter character.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+ <component>
+ <name>split_on_last</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ Breaks a string into two parts. The split point is selected based
+ on the last occurrence of a delimiter character.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+ <component>
+ <name>split</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ Breaks a string into a sequence of substrings delimited
+ by a user specified set of characters.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+ <component>
+ <name>right_substr</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to return the part of a string to the right of a user supplied delimiter.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+ <component>
+ <name>left_substr</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to return the part of a string to the left of a user supplied delimiter.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>rpad</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to pad whitespace (or user specified characters) onto the right most end of a string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>lpad</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to pad whitespace (or user specified characters) onto the left most end of a string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>pad</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to pad whitespace (or user specified characters) onto the ends of a string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>rtrim</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to remove the whitespace (or user specified characters) from the right most end of a string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>ltrim</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to remove the whitespace (or user specified characters) from the left most end of a string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>trim</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function to remove the whitespace (or user specified characters) from the ends of a string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>narrow</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a function for converting a string of type std::string or std::wstring
+ to a plain std::string.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>wrap_string</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ wrap_string is a function that takes a string and breaks it into a number of
+ lines of a given length. You can use this to make a string
+ fit nicely into a command prompt window for example.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>strings_equal_ignore_case</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ This is a pair of functions to do a case insensitive comparison between strings.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>cast_to_wstring</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ cast_to_string is a templated function which makes it easy to convert arbitrary objects to
+ std::wstring strings. The types supported are any types that can be written to std::wostream via
+ operator&lt;&lt;.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>cast_to_string</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ cast_to_string is a templated function which makes it easy to convert arbitrary objects to
+ std::string strings. The types supported are any types that can be written to std::ostream via
+ operator&lt;&lt;.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>pad_int_with_zeros</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ Converts an integer into a string and pads it with leading zeros.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>string_cast</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ string_cast is a templated function which makes it easy to convert strings to
+ other types. The types supported are any types that can be read by the basic_istream operator>>. It
+ also supports casting between wstring, string, and ustring objects.
+ </description>
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>string_assign</name>
+ <file>dlib/string.h</file>
+ <spec_file link="true">dlib/string/string_abstract.h</spec_file>
+ <description>
+ string_assign is an object which makes it easy to convert strings to
+ other types. The types supported are any types that can be read by the basic_istream operator>>. It
+ also supports casting between wstring, string, and ustring objects. Since
+ string_assign is a simple stateless object there is a global instance of it
+ called dlib::sa.
+ </description>
+ <examples>
+ <example>config_reader_ex.cpp.html</example>
+ </examples>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>unichar</name>
+ <file>dlib/unicode.h</file>
+ <spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
+ <description>
+ This is a typedef for an unsigned 32bit integer which we use to store
+ Unicode values.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>basic_utf8_ifstream</name>
+ <file>dlib/unicode.h</file>
+ <spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
+ <description>
+ This object represents an input file stream much like the
+ normal std::ifstream except that it knows how to read UTF-8
+ data. So when you read characters out of this stream it will
+ automatically convert them from the UTF-8 multibyte encoding
+ into a fixed width wide character encoding.
+
+ <p>
+ There are also two typedefs of this object. The first is utf8_wifstream which is a
+ typedef for wchar_t as the wide character to read into. The second is utf8_uifstream
+ which uses unichar instead of wchar_t.
+ </p>
+ </description>
+
+ </component>
+
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>ustring</name>
+ <file>dlib/unicode.h</file>
+ <spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
+ <description>
+ This is a typedef for a std::basic_string&lt;unichar&gt;. That is, it is a typedef
+ for a string object that stores unichar Unicode characters.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>is_combining_char</name>
+ <file>dlib/unicode.h</file>
+ <spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
+ <description>
+ This is a global function that can tell you if a character is a Unicode
+ combining character or not.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>convert_utf8_to_utf32</name>
+ <file>dlib/unicode.h</file>
+ <spec_file link="true">dlib/unicode/unicode_abstract.h</spec_file>
+ <description>
+ This is a global function that can convert UTF-8 strings into strings
+ of 32bit unichar characters.
+ </description>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>base64</name>
+ <file>dlib/base64.h</file>
+ <spec_file>dlib/base64/base64_kernel_abstract.h</spec_file>
+ <description>
+ This object allows you to encode and decode data to and from
+ the Base64 Content-Transfer-Encoding defined in section 6.8 of
+ rfc2045.
+ </description>
+
+ <examples>
+ <example>file_to_code_ex.cpp.html</example>
+ </examples>
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component checked="true">
+ <name>cmd_line_parser</name>
+ <file>dlib/cmd_line_parser.h</file>
+ <spec_file>dlib/cmd_line_parser/cmd_line_parser_kernel_abstract.h</spec_file>
+ <description>
+ This object allows you to easily parse a command line. Note that the
+ documentation for the <a href="dlib/interfaces/cmd_line_parser_option.h.html">cmd_line_parser_option</a>
+ (the object returned by the parser's .option() function) is in a separate file.
+ <p>
+ Note also that there are standard typedefs for the ASCII and wide character versions of the
+ cmd_line_parser template. These are the <tt>command_line_parser</tt> and <tt>wcommand_line_parser</tt>
+ types respectively.
+ </p>
+ </description>
+
+ <examples>
+ <example>compress_stream_ex.cpp.html</example>
+ <example>train_object_detector.cpp.html</example>
+ </examples>
+
+ <extensions>
+ <extension>
+ <name>get_option</name>
+ <spec_file>dlib/cmd_line_parser/get_option_abstract.h</spec_file>
+ <description>This extension provides a convenience function for accessing the
+ options to a command line argument or a <a href="#config_reader">config_reader</a>. It
+ is automatically #included when using the command line parser or config reader.
+ </description>
+ </extension>
+ </extensions>
+
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>config_reader</name>
+ <file>dlib/config_reader.h</file>
+ <spec_file>dlib/config_reader/config_reader_kernel_abstract.h</spec_file>
+ <description>
+ This object represents something which is intended to be used to read
+ text configuration files.
+ </description>
+
+ <examples>
+ <example>config_reader_ex.cpp.html</example>
+ </examples>
+
+ <extensions>
+ <extension>
+ <name>config_reader_thread_safe</name>
+ <spec_file>dlib/config_reader/config_reader_thread_safe_abstract.h</spec_file>
+ <description>
+ This object extends a normal config_reader by simply wrapping all
+ its member functions inside mutex locks to make it safe to use
+ in a threaded program.
+ </description>
+ </extension>
+ </extensions>
+
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>cpp_pretty_printer</name>
+ <file>dlib/cpp_pretty_printer.h</file>
+ <spec_file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_abstract.h</spec_file>
+ <description>
+ This object represents an HTML pretty printer for C++ source code.
+ </description>
+
+ <implementations>
+ <implementation>
+ <name>cpp_pretty_printer_kernel_1</name>
+ <file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_1.h</file>
+ <description>
+ This is implemented by using the <a href="#cpp_tokenizer">cpp_tokenizer</a> object.
+ This is the pretty printer I use on all the source in this library. It applies a color scheme, turns
+ include directives such as #include "file.h" into links to file.h.html and puts HTML anchor points
+ on function and class declarations. It also looks for comments starting with /*!A and puts an anchor
+ before the comment using the word following the A as the name of the anchor.
+ </description>
+
+ <typedefs>
+ <typedef>
+ <name>kernel_1a</name>
+ <description>is a typedef for cpp_pretty_printer_kernel_1</description>
+ </typedef>
+ </typedefs>
+
+ </implementation>
+ <implementation>
+ <name>cpp_pretty_printer_kernel_2</name>
+ <file>dlib/cpp_pretty_printer/cpp_pretty_printer_kernel_2.h</file>
+ <description>
+ This is implemented by using the <a href="#cpp_tokenizer">cpp_tokenizer</a> object.
+ It applies a black and white color scheme suitable
+ for printing on a black and white printer. It also places the document title
+ prominently at the top of the pretty printed source file.
+ </description>
+
+ <typedefs>
+ <typedef>
+ <name>kernel_2a</name>
+ <description>is a typedef for cpp_pretty_printer_kernel_2</description>
+ </typedef>
+ </typedefs>
+
+ </implementation>
+
+ </implementations>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component checked="true">
+ <name>cpp_tokenizer</name>
+ <file>dlib/cpp_tokenizer.h</file>
+ <spec_file>dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h</spec_file>
+ <description>
+ This object represents a simple tokenizer for C++ source code.
+ </description>
+
+ <implementations>
+ <implementation>
+ <name>cpp_tokenizer_kernel_1</name>
+ <file>dlib/cpp_tokenizer/cpp_tokenizer_kernel_1.h</file>
+ <description>
+ This is implemented by using the <a href="#tokenizer">tokenizer</a> object in the obvious way.
+ </description>
+
+ <typedefs>
+ <typedef>
+ <name>kernel_1a</name>
+ <description>is a typedef for cpp_tokenizer_kernel_1</description>
+ </typedef>
+ </typedefs>
+
+ </implementation>
+
+ </implementations>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component checked="true">
+ <name>tokenizer</name>
+ <file>dlib/tokenizer.h</file>
+ <spec_file>dlib/tokenizer/tokenizer_kernel_abstract.h</spec_file>
+ <description>
+ This object represents a simple tokenizer for textual data.
+ </description>
+
+ <implementations>
+ <implementation>
+ <name>tokenizer_kernel_1</name>
+ <file>dlib/tokenizer/tokenizer_kernel_1.h</file>
+ <description>
+ This is implemented in the obvious way.
+ </description>
+
+ <typedefs>
+ <typedef>
+ <name>kernel_1a</name>
+ <description>is a typedef for tokenizer_kernel_1</description>
+ </typedef>
+ </typedefs>
+
+ </implementation>
+
+ </implementations>
+
+ </component>
+
+ <!-- ************************************************************************* -->
+
+ <component>
+ <name>xml_parser</name>
+ <file>dlib/xml_parser.h</file>
+ <spec_file>dlib/xml_parser/xml_parser_kernel_abstract.h</spec_file>
+ <description>
+
+ This object represents a simple SAX style event driven XML parser.
+ It takes its input from an input stream object and sends events to all
+ registered document_handler and error_handler objects.
+ <br/><br/>
+
+ The xml_parser object also uses the interface classes
+ <a href="dlib/xml_parser/xml_parser_kernel_interfaces.h.html#document_handler">document_handler</a>
+ and
+ <a href="dlib/xml_parser/xml_parser_kernel_interfaces.h.html#error_handler">error_handler</a>.
+ Subclasses of these classes are passed to the xml_parser which generates events while it's
+ parsing and sends them to the appropriate handler.
+
+ </description>
+
+ <examples>
+ <example>xml_parser_ex.cpp.html</example>
+ </examples>
+ </component>
+
+
+ <!-- ************************************************************************* -->
+
+ </components>
+
+ <!-- ************************************************************************* -->
+
+
+</doc>