diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h')
-rw-r--r-- | ml/dlib/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/ml/dlib/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h b/ml/dlib/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h new file mode 100644 index 000000000..e7ac23284 --- /dev/null +++ b/ml/dlib/dlib/cpp_tokenizer/cpp_tokenizer_kernel_abstract.h @@ -0,0 +1,224 @@ +// Copyright (C) 2005 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_CPP_TOKENIZER_KERNEl_ABSTRACT_ +#ifdef DLIB_CPP_TOKENIZER_KERNEl_ABSTRACT_ + +#include <string> +#include <ioswfd> + +namespace dlib +{ + + class cpp_tokenizer + { + /*! + INITIAL VALUE + stream_is_set() == false + + WHAT THIS OBJECT REPRESENTS + This object represents a simple tokenizer for C++ source code. + + BUFFERING + This object is allowed to buffer data from the input stream. + Thus if you clear it or switch streams (via calling set_stream()) + any buffered data will be lost. + + TOKENS + When picking out tokens the cpp_tokenizer will always extract the + longest token it can. For example, if faced with the string + "AAA" it will consider the three As to be a single IDENTIFIER + token not three smaller IDENTIFIER tokens. + + Also note that no characters in the input stream are discarded. + They will all be returned in the text of some token. + Additionally, each character will never be returned more than once. + This means that if you concatenated all returned tokens it would exactly + reproduce the contents of the input stream. + + The tokens are defined as follows: + + END_OF_FILE + This token represents the end of file. It doesn't have any + actual characters associated with it. + + KEYWORD + This token matches a C++ keyword. (This includes the preprocessor + directives). + + COMMENT + This token matches a C++ comment. + + SINGLE_QUOTED_TEXT + This token matches the text of any single quoted literal. + For example, 'a' would be a match and the text of this token + would be the single character a. + + DOUBLE_QUOTED_TEXT + This token matches the text of any double quoted string. + For example, "C++" would be a match and the text of this token + would be the three character string C++. + + WHITE_SPACE + This is a multi character token. It is defined as a sequence of + one or more spaces, carrage returns, newlines, and tabs. I.e. It + is composed of characters from the following string " \r\n\t". + + IDENTIFIER + This token matches any C++ identifier that isn't matched by any + of the above tokens. (A C++ identifier being a string matching + the regular expression [_$a-zA-Z][_$a-zA-Z0-9]*). + + NUMBER + This token matches any C++ numerical constant. + + OTHER + This matches anything that isn't part of one of the above tokens. + It is always a single character. + !*/ + + public: + + enum + { + END_OF_FILE, + KEYWORD, + COMMENT, + SINGLE_QUOTED_TEXT, + DOUBLE_QUOTED_TEXT, + IDENTIFIER, + OTHER, + NUMBER, + WHITE_SPACE + }; + + cpp_tokenizer ( + ); + /*! + ensures + - #*this is properly initialized + throws + - std::bad_alloc + !*/ + + virtual ~cpp_tokenizer ( + ); + /*! + ensures + - any resources associated with *this have been released + !*/ + + void clear( + ); + /*! + ensures + - #*this has its initial value + throws + - std::bad_alloc + If this exception is thrown then #*this is unusable + until clear() is called and succeeds. + !*/ + + void set_stream ( + std::istream& in + ); + /*! + ensures + - #*this will read data from in and tokenize it + - #stream_is_set() == true + - #get_stream() == in + !*/ + + bool stream_is_set ( + ) const; + /*! + ensures + - returns true if a stream has been associated with *this by calling + set_stream() + !*/ + + std::istream& get_stream ( + ) const; + /*! + requires + - stream_is_set() == true + ensures + - returns a reference to the istream object that *this is reading + from. + !*/ + + void get_token ( + int& type, + std::string& token + ); + /*! + requires + - stream_is_set() == true + ensures + - #token == the next token from the input stream get_stream() + - #type == the type of the token in #token + throws + - bad_alloc + If this exception is thrown then the call to this function will + have no effect on *this but the values of #type and #token will be + undefined. Additionally, some characters may have been read + from the stream get_stream() and lost. + !*/ + + int peek_type ( + ) const; + /*! + requires + - stream_is_set() == true + ensures + - returns the type of the token that will be returned from + the next call to get_token() + throws + - bad_alloc + If this exception is thrown then the call to this function will + have no effect on *this. However, some characters may have been + read from the stream get_stream() and lost. + !*/ + + const std::string& peek_token ( + ) const; + /*! + requires + - stream_is_set() == true + ensures + - returns the text of the token that will be returned from + the next call to get_token() + throws + - bad_alloc + If this exception is thrown then the call to this function will + have no effect on *this. However, some characters may have been + read from the stream get_stream() and lost. + !*/ + + void swap ( + cpp_tokenizer& item + ); + /*! + ensures + - swaps *this and item + !*/ + + private: + + // restricted functions + cpp_tokenizer(const cpp_tokenizer&); // copy constructor + cpp_tokenizer& operator=(const cpp_tokenizer&); // assignment operator + + }; + + inline void swap ( + cpp_tokenizer& a, + cpp_tokenizer& b + ) { a.swap(b); } + /*! + provides a global swap function + !*/ + +} + +#endif // DLIB_CPP_TOKENIZER_KERNEl_ABSTRACT_ + |