summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/tokenizer
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/tokenizer')
-rw-r--r--ml/dlib/dlib/tokenizer/tokenizer_kernel_1.cpp295
-rw-r--r--ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h155
-rw-r--r--ml/dlib/dlib/tokenizer/tokenizer_kernel_abstract.h289
-rw-r--r--ml/dlib/dlib/tokenizer/tokenizer_kernel_c.h167
4 files changed, 906 insertions, 0 deletions
diff --git a/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.cpp b/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.cpp
new file mode 100644
index 000000000..daa83184c
--- /dev/null
+++ b/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.cpp
@@ -0,0 +1,295 @@
+// Copyright (C) 2005 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_TOKENIZER_KERNEL_1_CPp_
+#define DLIB_TOKENIZER_KERNEL_1_CPp_
+#include "tokenizer_kernel_1.h"
+
+#include <iostream>
+#include <cstdio>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ tokenizer_kernel_1::
+ tokenizer_kernel_1 (
+ ) :
+ headset(0),
+ bodyset(0),
+ have_peeked(false)
+ {
+ try
+ {
+ headset = new bool[UCHAR_MAX];
+ bodyset = new bool[UCHAR_MAX];
+
+ clear();
+ }
+ catch (...)
+ {
+ if (headset) delete [] headset;
+ if (bodyset) delete [] bodyset;
+ throw;
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ tokenizer_kernel_1::
+ ~tokenizer_kernel_1 (
+ )
+ {
+ delete [] bodyset;
+ delete [] headset;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void tokenizer_kernel_1::
+ clear(
+ )
+ {
+ using namespace std;
+
+ in = 0;
+ streambuf = 0;
+ have_peeked = false;
+
+ head = "_" + lowercase_letters() + uppercase_letters();
+ body = "_" + lowercase_letters() + uppercase_letters() + numbers();
+
+ for (unsigned long i = 0; i < UCHAR_MAX; ++i)
+ {
+ headset[i] = false;
+ bodyset[i] = false;
+ }
+
+ for (string::size_type i = 0; i < head.size(); ++i)
+ headset[static_cast<unsigned char>(head[i])] = true;
+ for (string::size_type i = 0; i < body.size(); ++i)
+ bodyset[static_cast<unsigned char>(body[i])] = true;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void tokenizer_kernel_1::
+ set_stream (
+ std::istream& in_
+ )
+ {
+ in = &in_;
+ streambuf = in_.rdbuf();
+ have_peeked = false;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ bool tokenizer_kernel_1::
+ stream_is_set (
+ ) const
+ {
+ return (in != 0);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ std::istream& tokenizer_kernel_1::
+ get_stream (
+ ) const
+ {
+ return *in;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void tokenizer_kernel_1::
+ get_token (
+ int& type,
+ std::string& token
+ )
+ {
+ if (!have_peeked)
+ {
+ std::streambuf::int_type ch;
+ ch = streambuf->sbumpc();
+
+ switch (ch)
+ {
+ case EOF:
+ type = END_OF_FILE;
+ token.clear();
+ return;
+
+ case '\n':
+ type = END_OF_LINE;
+ token = "\n";
+ return;
+
+ case '\r':
+ case ' ':
+ case '\t':
+ type = WHITE_SPACE;
+ token = static_cast<char>(ch);
+ ch = streambuf->sgetc();
+ while ((ch == ' ' || ch == '\t' || ch == '\r') && ch != EOF)
+ {
+ token += static_cast<char>(ch);
+ ch = streambuf->snextc();
+ }
+ return;
+
+ default:
+ if (headset[static_cast<unsigned char>(ch)])
+ {
+ type = IDENTIFIER;
+ token = static_cast<char>(ch);
+ ch = streambuf->sgetc();
+ while ( bodyset[static_cast<unsigned char>(ch)] && ch != EOF )
+ {
+ token += static_cast<char>(ch);
+ ch = streambuf->snextc();
+ }
+ }
+ else if ('0' <= ch && ch <= '9')
+ {
+ type = NUMBER;
+ token = static_cast<char>(ch);
+ ch = streambuf->sgetc();
+ while (('0' <= ch && ch <= '9') && ch != EOF)
+ {
+ token += static_cast<char>(ch);
+ ch = streambuf->snextc();
+ }
+ }
+ else
+ {
+ type = CHAR;
+ token = static_cast<char>(ch);
+ }
+ return;
+ } // switch (ch)
+ }
+
+ // if we get this far it means we have peeked so we should
+ // return the peek data.
+ type = next_type;
+ token = next_token;
+ have_peeked = false;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ int tokenizer_kernel_1::
+ peek_type (
+ ) const
+ {
+ const_cast<tokenizer_kernel_1*>(this)->get_token(next_type,next_token);
+ have_peeked = true;
+ return next_type;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ const std::string& tokenizer_kernel_1::
+ peek_token (
+ ) const
+ {
+ const_cast<tokenizer_kernel_1*>(this)->get_token(next_type,next_token);
+ have_peeked = true;
+ return next_token;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void tokenizer_kernel_1::
+ swap (
+ tokenizer_kernel_1& item
+ )
+ {
+ exchange(in,item.in);
+ exchange(streambuf,item.streambuf);
+ exchange(head,item.head);
+ exchange(body,item.body);
+ exchange(bodyset,item.bodyset);
+ exchange(headset,item.headset);
+ exchange(have_peeked,item.have_peeked);
+ exchange(next_type,item.next_type);
+ exchange(next_token,item.next_token);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ void tokenizer_kernel_1::
+ set_identifier_token (
+ const std::string& head_,
+ const std::string& body_
+ )
+ {
+ using namespace std;
+
+ head = head_;
+ body = body_;
+
+ for (unsigned long i = 0; i < UCHAR_MAX; ++i)
+ {
+ headset[i] = false;
+ bodyset[i] = false;
+ }
+
+ for (string::size_type i = 0; i < head.size(); ++i)
+ headset[static_cast<unsigned char>(head[i])] = true;
+ for (string::size_type i = 0; i < body.size(); ++i)
+ bodyset[static_cast<unsigned char>(body[i])] = true;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ const std::string tokenizer_kernel_1::
+ get_identifier_head (
+ ) const
+ {
+ return head;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ const std::string tokenizer_kernel_1::
+ get_identifier_body (
+ ) const
+ {
+ return body;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ const std::string tokenizer_kernel_1::
+ lowercase_letters (
+ ) const
+ {
+ return std::string("abcdefghijklmnopqrstuvwxyz");
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ const std::string tokenizer_kernel_1::
+ uppercase_letters (
+ ) const
+ {
+ return std::string("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ const std::string tokenizer_kernel_1::
+ numbers (
+ ) const
+ {
+ return std::string("0123456789");
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+#endif // DLIB_TOKENIZER_KERNEL_1_CPp_
+
diff --git a/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h b/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h
new file mode 100644
index 000000000..d67ae278f
--- /dev/null
+++ b/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h
@@ -0,0 +1,155 @@
+// Copyright (C) 2005 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_TOKENIZER_KERNEl_1_
+#define DLIB_TOKENIZER_KERNEl_1_
+
+#include <string>
+#include <iosfwd>
+#include <climits>
+#include "../algs.h"
+#include "tokenizer_kernel_abstract.h"
+
+namespace dlib
+{
+
+ class tokenizer_kernel_1
+ {
+ /*!
+ INITIAL VALUE
+ - in == 0
+ - streambuf == 0
+ - have_peeked == false
+ - head == "_" + lowercase_letters() + uppercase_letters()
+ - body == "_" + lowercase_letters() + uppercase_letters() + numbers()
+ - headset == pointer to an array of UCHAR_MAX bools and set according
+ to the CONVENTION.
+ - bodyset == pointer to an array of UCHAR_MAX bools and set according
+ to the CONVENTION.
+
+ CONVENTION
+ - if (stream_is_set()) then
+ - get_stream() == *in
+ - streambuf == in->rdbuf()
+ - else
+ - in == 0
+ - streambuf == 0
+
+ - body == get_identifier_body()
+ - head == get_identifier_head()
+
+ - if (the char x appears in head) then
+ - headset[static_cast<unsigned char>(x)] == true
+ - else
+ - headset[static_cast<unsigned char>(x)] == false
+
+ - if (the char x appears in body) then
+ - bodyset[static_cast<unsigned char>(x)] == true
+ - else
+ - bodyset[static_cast<unsigned char>(x)] == false
+
+ - if (have_peeked) then
+ - next_token == the next token to be returned from get_token()
+ - next_type == the type of token in peek_token
+ !*/
+
+ public:
+
+ // The name of this enum is irrelevant but on some compilers (gcc on MAC OS X) not having it named
+ // causes an error for whatever reason
+ enum some_random_name
+ {
+ END_OF_LINE,
+ END_OF_FILE,
+ IDENTIFIER,
+ CHAR,
+ NUMBER,
+ WHITE_SPACE
+ };
+
+ tokenizer_kernel_1 (
+ );
+
+ virtual ~tokenizer_kernel_1 (
+ );
+
+ void clear(
+ );
+
+ void set_stream (
+ std::istream& in
+ );
+
+ bool stream_is_set (
+ ) const;
+
+ std::istream& get_stream (
+ ) const;
+
+ void get_token (
+ int& type,
+ std::string& token
+ );
+
+ void swap (
+ tokenizer_kernel_1& item
+ );
+
+ void set_identifier_token (
+ const std::string& head,
+ const std::string& body
+ );
+
+ int peek_type (
+ ) const;
+
+ const std::string& peek_token (
+ ) const;
+
+ const std::string get_identifier_head (
+ ) const;
+
+ const std::string get_identifier_body (
+ ) const;
+
+ const std::string lowercase_letters (
+ ) const;
+
+ const std::string uppercase_letters (
+ ) const;
+
+ const std::string numbers (
+ ) const;
+
+ private:
+
+ // restricted functions
+ tokenizer_kernel_1(const tokenizer_kernel_1&); // copy constructor
+ tokenizer_kernel_1& operator=(const tokenizer_kernel_1&); // assignment operator
+
+
+ // data members
+ std::istream* in;
+ std::streambuf* streambuf;
+ std::string head;
+ std::string body;
+ bool* headset;
+ bool* bodyset;
+
+ mutable std::string next_token;
+ mutable int next_type;
+ mutable bool have_peeked;
+ };
+
+ inline void swap (
+ tokenizer_kernel_1& a,
+ tokenizer_kernel_1& b
+ ) { a.swap(b); }
+
+}
+
+#ifdef NO_MAKEFILE
+#include "tokenizer_kernel_1.cpp"
+#endif
+
+#endif // DLIB_TOKENIZER_KERNEl_1
+
diff --git a/ml/dlib/dlib/tokenizer/tokenizer_kernel_abstract.h b/ml/dlib/dlib/tokenizer/tokenizer_kernel_abstract.h
new file mode 100644
index 000000000..f534b8f7f
--- /dev/null
+++ b/ml/dlib/dlib/tokenizer/tokenizer_kernel_abstract.h
@@ -0,0 +1,289 @@
+// Copyright (C) 2005 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_TOKENIZER_KERNEl_ABSTRACT_
+#ifdef DLIB_TOKENIZER_KERNEl_ABSTRACT_
+
+#include <string>
+#include <ioswfd>
+
+namespace dlib
+{
+
+ class tokenizer
+ {
+ /*!
+ INITIAL VALUE
+ stream_is_set() == false
+ get_identifier_head() == "_" + lowercase_letters() + uppercase_letters()
+ get_identifier_body() == "_" + lowercase_letters() + uppercase_letters() +
+ numbers()
+
+ WHAT THIS OBJECT REPRESENTS
+ This object represents a simple tokenizer for textual data.
+
+ BUFFERING
+ This object is allowed to buffer data from the input stream.
+ Thus if you clear it or switch streams (via calling set_stream())
+ any buffered data will be lost.
+
+ TOKENS
+ When picking out tokens the tokenizer will always extract the
+ longest token it can. For example, if faced with the string
+ "555" it will consider the three 5s to be a single NUMBER
+ token not three smaller NUMBER tokens.
+
+ Also note that no characters in the input stream are discarded.
+ They will all be returned in the text of some token.
+ Additionally, each character will never be returned more than once.
+ This means that if you concatenated all returned tokens it would exactly
+ reproduce the contents of the input stream.
+
+ The tokens are defined as follows:
+
+ END_OF_LINE
+ This is a single character token and is always the '\n'
+ character.
+
+ END_OF_FILE
+ This token represents the end of file. It doesn't have any
+ actual characters associated with it.
+
+ IDENTIFIER
+ This is a multi-character token. It is defined as a string that
+ begins with a character from get_identifier_head() and is
+ followed by any number of characters from get_identifier_body().
+
+ NUMBER
+ This is a multi-character token. It is defined as a sequence of
+ numbers.
+
+ WHITE_SPACE
+ This is a multi character token. It is defined as a sequence of
+ one or more spaces, carrage returns, and tabs. I.e. It is
+ composed of characters from the following string " \r\t".
+
+ CHAR
+ This is a single character token. It matches anything that isn't
+ part of one of the above tokens.
+ !*/
+
+ public:
+
+ enum
+ {
+ END_OF_LINE,
+ END_OF_FILE,
+ IDENTIFIER,
+ CHAR,
+ NUMBER,
+ WHITE_SPACE
+ };
+
+ tokenizer (
+ );
+ /*!
+ ensures
+ - #*this is properly initialized
+ throws
+ - std::bad_alloc
+ !*/
+
+ virtual ~tokenizer (
+ );
+ /*!
+ ensures
+ - any resources associated with *this have been released
+ !*/
+
+ void clear(
+ );
+ /*!
+ ensures
+ - #*this has its initial value
+ throws
+ - std::bad_alloc
+ If this exception is thrown then #*this is unusable
+ until clear() is called and succeeds.
+ !*/
+
+ void set_stream (
+ std::istream& in
+ );
+ /*!
+ ensures
+ - #*this will read data from in and tokenize it
+ - #stream_is_set() == true
+ - #get_stream() == in
+ !*/
+
+ bool stream_is_set (
+ ) const;
+ /*!
+ ensures
+ - returns true if a stream has been associated with *this by calling
+ set_stream()
+ !*/
+
+ std::istream& get_stream (
+ ) const;
+ /*!
+ requires
+ - stream_is_set() == true
+ ensures
+ - returns a reference to the istream object that *this is reading
+ from.
+ !*/
+
+ void get_token (
+ int& type,
+ std::string& token
+ );
+ /*!
+ requires
+ - stream_is_set() == true
+ ensures
+ - #token == the next token from the input stream get_stream()
+ - #type == the type of the token in #token
+ throws
+ - bad_alloc
+ If this exception is thrown then the call to this function will
+ have no effect on *this but the values of #type and #token will be
+ undefined. Additionally, some characters may have been read
+ from the stream get_stream() and lost.
+ !*/
+
+ int peek_type (
+ ) const;
+ /*!
+ requires
+ - stream_is_set() == true
+ ensures
+ - returns the type of the token that will be returned from
+ the next call to get_token()
+ throws
+ - bad_alloc
+ If this exception is thrown then the call to this function will
+ have no effect on *this. However, some characters may have been
+ read from the stream get_stream() and lost.
+ !*/
+
+ const std::string& peek_token (
+ ) const;
+ /*!
+ requires
+ - stream_is_set() == true
+ ensures
+ - returns the text of the token that will be returned from
+ the next call to get_token()
+ throws
+ - bad_alloc
+ If this exception is thrown then the call to this function will
+ have no effect on *this. However, some characters may have been
+ read from the stream get_stream() and lost.
+ !*/
+
+ void set_identifier_token (
+ const std::string& head,
+ const std::string& body
+ );
+ /*!
+ requires
+ - head.find_first_of(" \r\t\n0123456789") == std::string::npos
+ (i.e. head doesn't contain any characters from the string
+ " \r\t\n0123456789").
+ - body.find_frst_of(" \r\t\n") == std::string::npos
+ (i.e. body doesn't contain any characters from the string " \r\t\n").
+ ensures
+ - #get_identifier_head() == head
+ - #get_identifier_body() == body
+ throws
+ - std::bad_alloc
+ If this exception is thrown then #*this is unusable
+ until clear() is called and succeeds.
+ !*/
+
+ const std::string get_identifier_head (
+ ) const;
+ /*!
+ ensures
+ - returns a string containing the characters that can be the start
+ of an IDENTIFIER token.
+ throws
+ - std::bad_alloc
+ If this exception is thrown then the call to this function
+ has no effect.
+ !*/
+
+ const std::string get_identifier_body (
+ ) const;
+ /*!
+ ensures
+ - returns a string containing the characters that can appear in the
+ body of an IDENTIFIER token.
+ throws
+ - std::bad_alloc
+ If this exception is thrown then the call to this function
+ has no effect.
+ !*/
+
+ const std::string lowercase_letters (
+ ) const;
+ /*!
+ ensures
+ - returns "abcdefghijklmnopqrstuvwxyz"
+ throws
+ - std::bad_alloc
+ If this exception is thrown then the call to this function
+ has no effect.
+ !*/
+
+ const std::string uppercase_letters (
+ ) const;
+ /*!
+ ensures
+ - returns "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ throws
+ - std::bad_alloc
+ If this exception is thrown then the call to this function
+ has no effect.
+ !*/
+
+ const std::string numbers (
+ ) const;
+ /*!
+ ensures
+ - returns "0123456789"
+ throws
+ - std::bad_alloc
+ If this exception is thrown then the call to this function
+ has no effect.
+ !*/
+
+ void swap (
+ tokenizer& item
+ );
+ /*!
+ ensures
+ - swaps *this and item
+ !*/
+
+ private:
+
+ // restricted functions
+ tokenizer(const tokenizer&); // copy constructor
+ tokenizer& operator=(const tokenizer&); // assignment operator
+
+ };
+
+ inline void swap (
+ tokenizer& a,
+ tokenizer& b
+ ) { a.swap(b); }
+ /*!
+ provides a global swap function
+ !*/
+
+}
+
+#endif // DLIB_TOKENIZER_KERNEl_ABSTRACT_
+
diff --git a/ml/dlib/dlib/tokenizer/tokenizer_kernel_c.h b/ml/dlib/dlib/tokenizer/tokenizer_kernel_c.h
new file mode 100644
index 000000000..f9604809d
--- /dev/null
+++ b/ml/dlib/dlib/tokenizer/tokenizer_kernel_c.h
@@ -0,0 +1,167 @@
+// Copyright (C) 2003 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_TOKENIZER_KERNEl_C_
+#define DLIB_TOKENIZER_KERNEl_C_
+
+#include "tokenizer_kernel_abstract.h"
+#include "../assert.h"
+#include <string>
+#include <iostream>
+
+namespace dlib
+{
+
+ template <
+ typename tokenizer
+ >
+ class tokenizer_kernel_c : public tokenizer
+ {
+
+ public:
+ std::istream& get_stream (
+ ) const;
+
+ void get_token (
+ int& type,
+ std::string& token
+ );
+
+ void set_identifier_token (
+ const std::string& head,
+ const std::string& body
+ );
+
+ int peek_type (
+ ) const;
+
+ const std::string& peek_token (
+ ) const;
+ };
+
+ template <
+ typename tokenizer
+ >
+ inline void swap (
+ tokenizer_kernel_c<tokenizer>& a,
+ tokenizer_kernel_c<tokenizer>& b
+ ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+ // member function definitions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename tokenizer
+ >
+ void tokenizer_kernel_c<tokenizer>::
+ set_identifier_token (
+ const std::string& head,
+ const std::string& body
+ )
+ {
+ using namespace std;
+ // make sure requires clause is not broken
+ DLIB_CASSERT( head.find_first_of(" \r\t\n0123456789") == string::npos &&
+ body.find_first_of(" \r\t\n") == string::npos ,
+ "\tvoid tokenizer::set_identifier_token()"
+ << "\n\tyou can't define the IDENTIFIER token this way."
+ << "\n\thead: " << head
+ << "\n\tbody: " << body
+ << "\n\tthis: " << this
+ );
+
+ // call the real function
+ tokenizer::set_identifier_token(head,body);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename tokenizer
+ >
+ std::istream& tokenizer_kernel_c<tokenizer>::
+ get_stream (
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT( this->stream_is_set() == true,
+ "\tstd::istream& tokenizer::get_stream()"
+ << "\n\tyou must set a stream for this object before you can get it"
+ << "\n\tthis: " << this
+ );
+
+ // call the real function
+ return tokenizer::get_stream();
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename tokenizer
+ >
+ int tokenizer_kernel_c<tokenizer>::
+ peek_type (
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT( this->stream_is_set() == true,
+ "\tint tokenizer::peek_type()"
+ << "\n\tyou must set a stream for this object before you peek at what it contains"
+ << "\n\tthis: " << this
+ );
+
+ // call the real function
+ return tokenizer::peek_type();
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename tokenizer
+ >
+ const std::string& tokenizer_kernel_c<tokenizer>::
+ peek_token (
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT( this->stream_is_set() == true,
+ "\tint tokenizer::peek_token()"
+ << "\n\tyou must set a stream for this object before you peek at what it contains"
+ << "\n\tthis: " << this
+ );
+
+ // call the real function
+ return tokenizer::peek_token();
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename tokenizer
+ >
+ void tokenizer_kernel_c<tokenizer>::
+ get_token (
+ int& type,
+ std::string& token
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_CASSERT( this->stream_is_set() == true,
+ "\tvoid tokenizer::get_token()"
+ << "\n\tyou must set a stream for this object before you can get tokens from it."
+ << "\n\tthis: " << this
+ );
+
+ // call the real function
+ tokenizer::get_token(type,token);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_TOKENIZER_KERNEl_C_
+
+