diff options
Diffstat (limited to 'ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h')
-rw-r--r-- | ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h b/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h new file mode 100644 index 000000000..d67ae278f --- /dev/null +++ b/ml/dlib/dlib/tokenizer/tokenizer_kernel_1.h @@ -0,0 +1,155 @@ +// Copyright (C) 2005 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_TOKENIZER_KERNEl_1_ +#define DLIB_TOKENIZER_KERNEl_1_ + +#include <string> +#include <iosfwd> +#include <climits> +#include "../algs.h" +#include "tokenizer_kernel_abstract.h" + +namespace dlib +{ + + class tokenizer_kernel_1 + { + /*! + INITIAL VALUE + - in == 0 + - streambuf == 0 + - have_peeked == false + - head == "_" + lowercase_letters() + uppercase_letters() + - body == "_" + lowercase_letters() + uppercase_letters() + numbers() + - headset == pointer to an array of UCHAR_MAX bools and set according + to the CONVENTION. + - bodyset == pointer to an array of UCHAR_MAX bools and set according + to the CONVENTION. + + CONVENTION + - if (stream_is_set()) then + - get_stream() == *in + - streambuf == in->rdbuf() + - else + - in == 0 + - streambuf == 0 + + - body == get_identifier_body() + - head == get_identifier_head() + + - if (the char x appears in head) then + - headset[static_cast<unsigned char>(x)] == true + - else + - headset[static_cast<unsigned char>(x)] == false + + - if (the char x appears in body) then + - bodyset[static_cast<unsigned char>(x)] == true + - else + - bodyset[static_cast<unsigned char>(x)] == false + + - if (have_peeked) then + - next_token == the next token to be returned from get_token() + - next_type == the type of token in peek_token + !*/ + + public: + + // The name of this enum is irrelevant but on some compilers (gcc on MAC OS X) not having it named + // causes an error for whatever reason + enum some_random_name + { + END_OF_LINE, + END_OF_FILE, + IDENTIFIER, + CHAR, + NUMBER, + WHITE_SPACE + }; + + tokenizer_kernel_1 ( + ); + + virtual ~tokenizer_kernel_1 ( + ); + + void clear( + ); + + void set_stream ( + std::istream& in + ); + + bool stream_is_set ( + ) const; + + std::istream& get_stream ( + ) const; + + void get_token ( + int& type, + std::string& token + ); + + void swap ( + tokenizer_kernel_1& item + ); + + void set_identifier_token ( + const std::string& head, + const std::string& body + ); + + int peek_type ( + ) const; + + const std::string& peek_token ( + ) const; + + const std::string get_identifier_head ( + ) const; + + const std::string get_identifier_body ( + ) const; + + const std::string lowercase_letters ( + ) const; + + const std::string uppercase_letters ( + ) const; + + const std::string numbers ( + ) const; + + private: + + // restricted functions + tokenizer_kernel_1(const tokenizer_kernel_1&); // copy constructor + tokenizer_kernel_1& operator=(const tokenizer_kernel_1&); // assignment operator + + + // data members + std::istream* in; + std::streambuf* streambuf; + std::string head; + std::string body; + bool* headset; + bool* bodyset; + + mutable std::string next_token; + mutable int next_type; + mutable bool have_peeked; + }; + + inline void swap ( + tokenizer_kernel_1& a, + tokenizer_kernel_1& b + ) { a.swap(b); } + +} + +#ifdef NO_MAKEFILE +#include "tokenizer_kernel_1.cpp" +#endif + +#endif // DLIB_TOKENIZER_KERNEl_1 + |