diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:57:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 02:57:58 +0000 |
commit | be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch) | |
tree | 9754ff1ca740f6346cf8483ec915d4054bc5da2d /ml/dlib/tools/htmlify/to_xml.cpp | |
parent | Initial commit. (diff) | |
download | netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.tar.xz netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.zip |
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/tools/htmlify/to_xml.cpp')
-rw-r--r-- | ml/dlib/tools/htmlify/to_xml.cpp | 1599 |
1 files changed, 1599 insertions, 0 deletions
diff --git a/ml/dlib/tools/htmlify/to_xml.cpp b/ml/dlib/tools/htmlify/to_xml.cpp new file mode 100644 index 00000000..7fae4338 --- /dev/null +++ b/ml/dlib/tools/htmlify/to_xml.cpp @@ -0,0 +1,1599 @@ + +#include "to_xml.h" +#include "dlib/dir_nav.h" +#include <vector> +#include <sstream> +#include <iostream> +#include <fstream> +#include <stack> +#include "dlib/cpp_tokenizer.h" +#include "dlib/string.h" + +using namespace dlib; +using namespace std; + +// ---------------------------------------------------------------------------------------- + +typedef cpp_tokenizer::kernel_1a_c tok_type; + +// ---------------------------------------------------------------------------------------- + +class file_filter +{ +public: + + file_filter( + const string& filter + ) + { + // pick out the filter strings + istringstream sin(filter); + string temp; + sin >> temp; + while (sin) + { + endings.push_back("." + temp); + sin >> temp; + } + } + + bool operator() ( const file& f) const + { + // check if any of the endings match + for (unsigned long i = 0; i < endings.size(); ++i) + { + // if the ending is bigger than f's name then it obviously doesn't match + if (endings[i].size() > f.name().size()) + continue; + + // now check if the actual characters that make up the end of the file name + // matches what is in endings[i]. + if ( std::equal(endings[i].begin(), endings[i].end(), f.name().end()-endings[i].size())) + return true; + } + + return false; + } + + std::vector<string> endings; +}; + +// ---------------------------------------------------------------------------------------- + +void obtain_list_of_files ( + const cmd_line_parser<char>::check_1a_c& parser, + const std::string& filter, + const unsigned long search_depth, + std::vector<std::pair<string,string> >& files +) +{ + for (unsigned long i = 0; i < parser.option("i").count(); ++i) + { + const directory dir(parser.option("i").argument(0,i)); + + const std::vector<file>& temp = get_files_in_directory_tree(dir, file_filter(filter), search_depth); + + // figure out how many characters need to be removed from the path of each file + const string parent = dir.get_parent().full_name(); + unsigned long strip = parent.size(); + if (parent.size() > 0 && parent[parent.size()-1] != '\\' && parent[parent.size()-1] != '/') + strip += 1; + + for (unsigned long i = 0; i < temp.size(); ++i) + { + files.push_back(make_pair(temp[i].full_name().substr(strip), temp[i].full_name())); + } + } + + for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) + { + files.push_back(make_pair(parser[i], parser[i])); + } + + std::sort(files.begin(), files.end()); +} + +// ---------------------------------------------------------------------------------------- + +struct tok_function_record +{ + std::vector<std::pair<int,string> > declaration; + string scope; + string file; + string comment; +}; + +struct tok_method_record +{ + std::vector<std::pair<int,string> > declaration; + string comment; +}; + +struct tok_variable_record +{ + std::vector<std::pair<int,string> > declaration; +}; + +struct tok_typedef_record +{ + std::vector<std::pair<int,string> > declaration; +}; + +struct tok_class_record +{ + std::vector<std::pair<int,string> > declaration; + string name; + string scope; + string file; + string comment; + + std::vector<tok_method_record> public_methods; + std::vector<tok_method_record> protected_methods; + std::vector<tok_variable_record> public_variables; + std::vector<tok_typedef_record> public_typedefs; + std::vector<tok_variable_record> protected_variables; + std::vector<tok_typedef_record> protected_typedefs; + std::vector<tok_class_record> public_inner_classes; + std::vector<tok_class_record> protected_inner_classes; +}; + +// ---------------------------------------------------------------------------------------- + +struct function_record +{ + string name; + string scope; + string declaration; + string file; + string comment; +}; + +struct method_record +{ + string name; + string declaration; + string comment; +}; + +struct variable_record +{ + string declaration; +}; + +struct typedef_record +{ + string declaration; +}; + +struct class_record +{ + string name; + string scope; + string declaration; + string file; + string comment; + + std::vector<method_record> public_methods; + std::vector<variable_record> public_variables; + std::vector<typedef_record> public_typedefs; + + std::vector<method_record> protected_methods; + std::vector<variable_record> protected_variables; + std::vector<typedef_record> protected_typedefs; + + std::vector<class_record> public_inner_classes; + std::vector<class_record> protected_inner_classes; +}; + +// ---------------------------------------------------------------------------------------- + +unsigned long count_newlines ( + const string& str +) +/*! + ensures + - returns the number of '\n' characters inside str +!*/ +{ + unsigned long count = 0; + for (unsigned long i = 0; i < str.size(); ++i) + { + if (str[i] == '\n') + ++count; + } + return count; +} + +// ---------------------------------------------------------------------------------------- + +bool contains_unescaped_newline ( + const string& str +) +/*! + ensures + - returns true if str contains a '\n' character that isn't preceded by a '\' + character. +!*/ +{ + if (str.size() == 0) + return false; + + if (str[0] == '\n') + return true; + + for (unsigned long i = 1; i < str.size(); ++i) + { + if (str[i] == '\n' && str[i-1] != '\\') + return true; + } + + return false; +} + +// ---------------------------------------------------------------------------------------- + +bool is_formal_comment ( + const string& str +) +{ + if (str.size() < 6) + return false; + + if (str[0] == '/' && + str[1] == '*' && + str[2] == '!' && + str[3] != 'P' && + str[3] != 'p' && + str[str.size()-3] == '!' && + str[str.size()-2] == '*' && + str[str.size()-1] == '/' ) + return true; + + return false; +} + +// ---------------------------------------------------------------------------------------- + +string make_scope_string ( + const std::vector<string>& namespaces, + unsigned long exclude_last_num_scopes = 0 +) +{ + string temp; + for (unsigned long i = 0; i + exclude_last_num_scopes < namespaces.size(); ++i) + { + if (namespaces[i].size() == 0) + continue; + + if (temp.size() == 0) + temp = namespaces[i]; + else + temp += "::" + namespaces[i]; + } + return temp; +} + +// ---------------------------------------------------------------------------------------- + +bool looks_like_function_declaration ( + const std::vector<std::pair<int,string> >& declaration +) +{ + + // Check if declaration contains IDENTIFIER ( ) somewhere in it. + bool seen_first_part = false; + bool seen_operator = false; + int local_paren_count = 0; + for (unsigned long i = 1; i < declaration.size(); ++i) + { + if (declaration[i].first == tok_type::KEYWORD && + declaration[i].second == "operator") + { + seen_operator = true; + } + + if (declaration[i].first == tok_type::OTHER && + declaration[i].second == "(" && + (declaration[i-1].first == tok_type::IDENTIFIER || seen_operator)) + { + seen_first_part = true; + } + + if (declaration[i].first == tok_type::OTHER) + { + if ( declaration[i].second == "(") + ++local_paren_count; + else if ( declaration[i].second == ")") + --local_paren_count; + } + } + + if (seen_first_part && local_paren_count == 0) + return true; + else + return false; +} + +// ---------------------------------------------------------------------------------------- + +enum scope_type +{ + public_scope, + protected_scope, + private_scope +}; + + +void process_file ( + istream& fin, + const string& file, + std::vector<tok_function_record>& functions, + std::vector<tok_class_record>& classes +) +/*! + ensures + - scans the given file for global functions and appends any found into functions. + - scans the given file for global classes and appends any found into classes. +!*/ +{ + tok_type tok; + tok.set_stream(fin); + + bool recently_seen_struct_keyword = false; + // true if we have seen the struct keyword and + // we have not seen any identifiers or { characters + + string last_struct_name; + // the name of the last struct we have seen + + bool recently_seen_class_keyword = false; + // true if we have seen the class keyword and + // we have not seen any identifiers or { characters + + string last_class_name; + // the name of the last class we have seen + + bool recently_seen_namespace_keyword = false; + // true if we have seen the namespace keyword and + // we have not seen any identifiers or { characters + + string last_namespace_name; + // the name of the last namespace we have seen + + bool recently_seen_pound_define = false; + // true if we have seen a #define and haven't seen an unescaped newline + + bool recently_seen_preprocessor = false; + // true if we have seen a preprocessor statement and haven't seen an unescaped newline + + bool recently_seen_typedef = false; + // true if we have seen a typedef keyword and haven't seen a ; + + bool recently_seen_paren_0 = false; + // true if we have seen paren_count transition to zero but haven't yet seen a ; or { or + // a new line if recently_seen_pound_define is true. + + bool recently_seen_slots = false; + // true if we have seen the identifier "slots" at a zero scope but haven't seen any + // other identifiers or the ';' or ':' characters. + + bool recently_seen_closing_bracket = false; + // true if we have seen a } and haven't yet seen an IDENTIFIER or ; + + bool recently_seen_new_scope = false; + // true if we have seen the keywords class, namespace, struct, or extern and + // we have not seen the characters {, ), or ; since then + + bool at_top_of_new_scope = false; + // true if we have seen the { that started a new scope but haven't seen anything yet but WHITE_SPACE + + std::vector<string> namespaces; + // a stack to hold the names of the scopes we have entered. This is the classes, structs, and namespaces we enter. + namespaces.push_back(""); // this is the global namespace + + std::stack<scope_type> scope_access; + // If the stack isn't empty then we are inside a class or struct and the top value + // in the stack tells if we are in a public, protected, or private region. + + std::stack<unsigned long> scopes; // a stack to hold current and old scope counts + // the top of the stack counts the number of new scopes (i.e. unmatched { } we have entered + // since we were at a scope where functions can be defined. + // We also maintain the invariant that scopes.size() == namespaces.size() + scopes.push(0); + + std::stack<tok_class_record> class_stack; + // This is a stack where class_stack.top() == the incomplete class record for the class declaration we are + // currently in. + + unsigned long paren_count = 0; + // this is the number of ( we have seen minus the number of ) we have + // seen. + + std::vector<std::pair<int,string> > token_accum; + // Used to accumulate tokens for function and class declarations + + std::vector<std::pair<int,string> > last_full_declaration; + // Once we determine that token_accum has a full declaration in it we copy it into last_full_declaration. + + int type; + string token; + + tok.get_token(type, token); + + while (type != tok_type::END_OF_FILE) + { + switch(type) + { + case tok_type::KEYWORD: // ------------------------------------------ + { + token_accum.push_back(make_pair(type,token)); + + if (token[0] == '#') + recently_seen_preprocessor = true; + + if (token == "class") + { + recently_seen_class_keyword = true; + recently_seen_new_scope = true; + } + else if (token == "struct") + { + recently_seen_struct_keyword = true; + recently_seen_new_scope = true; + } + else if (token == "namespace") + { + recently_seen_namespace_keyword = true; + recently_seen_new_scope = true; + } + else if (token == "extern") + { + recently_seen_new_scope = true; + } + else if (token == "#define") + { + recently_seen_pound_define = true; + } + else if (token == "typedef") + { + recently_seen_typedef = true; + } + else if (recently_seen_pound_define == false) + { + // eat white space + int temp_type; + string temp_token; + if (tok.peek_type() == tok_type::WHITE_SPACE) + tok.get_token(temp_type, temp_token); + + const bool next_is_colon = (tok.peek_type() == tok_type::OTHER && tok.peek_token() == ":"); + if (next_is_colon) + { + // eat the colon + tok.get_token(temp_type, temp_token); + + if (scope_access.size() > 0 && token == "public") + { + scope_access.top() = public_scope; + token_accum.clear(); + last_full_declaration.clear(); + } + else if (scope_access.size() > 0 && token == "protected") + { + scope_access.top() = protected_scope; + token_accum.clear(); + last_full_declaration.clear(); + } + else if (scope_access.size() > 0 && token == "private") + { + scope_access.top() = private_scope; + token_accum.clear(); + last_full_declaration.clear(); + } + } + } + + at_top_of_new_scope = false; + + }break; + + case tok_type::COMMENT: // ------------------------------------------ + { + if (scopes.top() == 0 && last_full_declaration.size() > 0 && is_formal_comment(token) && + paren_count == 0) + { + + // if we are inside a class or struct + if (scope_access.size() > 0) + { + // if we are looking at a comment at the top of a class + if (at_top_of_new_scope) + { + // push an entry for this class into the class_stack + tok_class_record temp; + temp.declaration = last_full_declaration; + temp.file = file; + temp.name = namespaces.back(); + temp.scope = make_scope_string(namespaces,1); + temp.comment = token; + class_stack.push(temp); + } + else if (scope_access.top() == public_scope || scope_access.top() == protected_scope) + { + // This should be a member function. + // Only do anything if the class that contains this member function is + // in the class_stack. + if (class_stack.size() > 0 && class_stack.top().name == namespaces.back() && + looks_like_function_declaration(last_full_declaration)) + { + tok_method_record temp; + + // Check if there is an initialization list inside the declaration and if there is + // then find out where the starting : is located so we can avoid including it in + // the output. + unsigned long pos = last_full_declaration.size(); + long temp_paren_count = 0; + for (unsigned long i = 0; i < last_full_declaration.size(); ++i) + { + if (last_full_declaration[i].first == tok_type::OTHER) + { + if (last_full_declaration[i].second == "(") + ++temp_paren_count; + else if (last_full_declaration[i].second == ")") + --temp_paren_count; + else if (temp_paren_count == 0 && last_full_declaration[i].second == ":") + { + // if this is a :: then ignore it + if (i > 0 && last_full_declaration[i-1].second == ":") + continue; + else if (i+1 < last_full_declaration.size() && last_full_declaration[i+1].second == ":") + continue; + else + { + pos = i; + break; + } + } + } + } + + temp.declaration.assign(last_full_declaration.begin(), last_full_declaration.begin()+pos); + temp.comment = token; + if (scope_access.top() == public_scope) + class_stack.top().public_methods.push_back(temp); + else + class_stack.top().protected_methods.push_back(temp); + } + } + } + else + { + // we should be looking at a global declaration of some kind. + if (looks_like_function_declaration(last_full_declaration)) + { + tok_function_record temp; + + // make sure we never include anything beyond the first closing ) + // if we are looking at a #defined function + unsigned long pos = last_full_declaration.size(); + if (last_full_declaration[0].second == "#define") + { + long temp_paren_count = 0; + for (unsigned long i = 0; i < last_full_declaration.size(); ++i) + { + if (last_full_declaration[i].first == tok_type::OTHER) + { + if (last_full_declaration[i].second == "(") + { + ++temp_paren_count; + } + else if (last_full_declaration[i].second == ")") + { + --temp_paren_count; + if (temp_paren_count == 0) + { + pos = i+1; + break; + } + } + } + } + } + + temp.declaration.assign(last_full_declaration.begin(), last_full_declaration.begin()+pos); + temp.file = file; + temp.scope = make_scope_string(namespaces); + temp.comment = token; + functions.push_back(temp); + } + } + + token_accum.clear(); + last_full_declaration.clear(); + } + + at_top_of_new_scope = false; + }break; + + case tok_type::IDENTIFIER: // ------------------------------------------ + { + if (recently_seen_class_keyword) + { + last_class_name = token; + last_struct_name.clear(); + last_namespace_name.clear(); + } + else if (recently_seen_struct_keyword) + { + last_struct_name = token; + last_class_name.clear(); + last_namespace_name.clear(); + } + else if (recently_seen_namespace_keyword) + { + last_namespace_name = token; + last_class_name.clear(); + last_struct_name.clear(); + } + + if (scopes.top() == 0 && token == "slots") + recently_seen_slots = true; + else + recently_seen_slots = false; + + recently_seen_class_keyword = false; + recently_seen_struct_keyword = false; + recently_seen_namespace_keyword = false; + recently_seen_closing_bracket = false; + at_top_of_new_scope = false; + + token_accum.push_back(make_pair(type,token)); + }break; + + case tok_type::OTHER: // ------------------------------------------ + { + switch(token[0]) + { + case '{': + // if we are entering a new scope + if (recently_seen_new_scope) + { + scopes.push(0); + at_top_of_new_scope = true; + + // if we are entering a class + if (last_class_name.size() > 0) + { + scope_access.push(private_scope); + namespaces.push_back(last_class_name); + } + else if (last_struct_name.size() > 0) + { + scope_access.push(public_scope); + namespaces.push_back(last_struct_name); + } + else if (last_namespace_name.size() > 0) + { + namespaces.push_back(last_namespace_name); + } + else + { + namespaces.push_back(""); + } + } + else + { + scopes.top() += 1; + } + recently_seen_new_scope = false; + recently_seen_class_keyword = false; + recently_seen_struct_keyword = false; + recently_seen_namespace_keyword = false; + recently_seen_paren_0 = false; + + // a { at function scope is an end of a potential declaration + if (scopes.top() == 0) + { + // put token_accum into last_full_declaration + token_accum.swap(last_full_declaration); + } + token_accum.clear(); + break; + + case '}': + if (scopes.top() > 0) + { + scopes.top() -= 1; + } + else if (scopes.size() > 1) + { + scopes.pop(); + + if (scope_access.size() > 0) + scope_access.pop(); + + // If the scope we are leaving is the top class on the class_stack + // then we need to either pop it into its containing class or put it + // into the classes output vector. + if (class_stack.size() > 0 && namespaces.back() == class_stack.top().name) + { + // If this class is a inner_class of another then push it into the + // public_inner_classes or protected_inner_classes field of it's containing class. + if (class_stack.size() > 1) + { + tok_class_record temp = class_stack.top(); + class_stack.pop(); + if (scope_access.size() > 0) + { + if (scope_access.top() == public_scope) + class_stack.top().public_inner_classes.push_back(temp); + else if (scope_access.top() == protected_scope) + class_stack.top().protected_inner_classes.push_back(temp); + } + } + else if (class_stack.size() > 0) + { + classes.push_back(class_stack.top()); + class_stack.pop(); + } + } + + namespaces.pop_back(); + last_full_declaration.clear(); + } + + token_accum.clear(); + recently_seen_closing_bracket = true; + at_top_of_new_scope = false; + break; + + case ';': + // a ; at function scope is an end of a potential declaration + if (scopes.top() == 0) + { + // put token_accum into last_full_declaration + token_accum.swap(last_full_declaration); + } + token_accum.clear(); + + // if we are inside the public area of a class and this ; might be the end + // of a typedef or variable declaration + if (scopes.top() == 0 && scope_access.size() > 0 && + (scope_access.top() == public_scope || scope_access.top() == protected_scope) && + recently_seen_closing_bracket == false) + { + if (recently_seen_typedef) + { + // This should be a typedef inside the public area of a class or struct: + // Only do anything if the class that contains this typedef is in the class_stack. + if (class_stack.size() > 0 && class_stack.top().name == namespaces.back()) + { + tok_typedef_record temp; + temp.declaration = last_full_declaration; + if (scope_access.top() == public_scope) + class_stack.top().public_typedefs.push_back(temp); + else + class_stack.top().protected_typedefs.push_back(temp); + } + + } + else if (recently_seen_paren_0 == false && recently_seen_new_scope == false) + { + // This should be some kind of public variable declaration inside a class or struct: + // Only do anything if the class that contains this member variable is in the class_stack. + if (class_stack.size() > 0 && class_stack.top().name == namespaces.back()) + { + tok_variable_record temp; + temp.declaration = last_full_declaration; + if (scope_access.top() == public_scope) + class_stack.top().public_variables.push_back(temp); + else + class_stack.top().protected_variables.push_back(temp); + } + + } + } + + recently_seen_new_scope = false; + recently_seen_typedef = false; + recently_seen_paren_0 = false; + recently_seen_closing_bracket = false; + recently_seen_slots = false; + at_top_of_new_scope = false; + break; + + case ':': + token_accum.push_back(make_pair(type,token)); + if (recently_seen_slots) + { + token_accum.clear(); + last_full_declaration.clear(); + recently_seen_slots = false; + } + break; + + case '(': + ++paren_count; + token_accum.push_back(make_pair(type,token)); + at_top_of_new_scope = false; + break; + + case ')': + token_accum.push_back(make_pair(type,token)); + + --paren_count; + if (paren_count == 0) + { + recently_seen_paren_0 = true; + if (scopes.top() == 0) + { + last_full_declaration = token_accum; + } + } + + recently_seen_new_scope = false; + at_top_of_new_scope = false; + break; + + default: + token_accum.push_back(make_pair(type,token)); + at_top_of_new_scope = false; + break; + } + }break; + + + case tok_type::WHITE_SPACE: // ------------------------------------------ + { + if (recently_seen_pound_define) + { + if (contains_unescaped_newline(token)) + { + recently_seen_pound_define = false; + recently_seen_paren_0 = false; + recently_seen_preprocessor = false; + + // this is an end of a potential declaration + token_accum.swap(last_full_declaration); + token_accum.clear(); + } + } + + if (recently_seen_preprocessor) + { + if (contains_unescaped_newline(token)) + { + recently_seen_preprocessor = false; + + last_full_declaration.clear(); + token_accum.clear(); + } + } + }break; + + default: // ------------------------------------------ + { + token_accum.push_back(make_pair(type,token)); + at_top_of_new_scope = false; + }break; + } + + + tok.get_token(type, token); + } +} + +// ---------------------------------------------------------------------------------------- + +string get_function_name ( + const std::vector<std::pair<int,string> >& declaration +) +{ + string name; + + bool contains_operator = false; + unsigned long operator_pos = 0; + for (unsigned long i = 0; i < declaration.size(); ++i) + { + if (declaration[i].first == tok_type::KEYWORD && + declaration[i].second == "operator") + { + contains_operator = true; + operator_pos = i; + break; + } + } + + + // find the opening ( for the function + unsigned long paren_pos = 0; + long paren_count = 0; + for (long i = declaration.size()-1; i >= 0; --i) + { + if (declaration[i].first == tok_type::OTHER && + declaration[i].second == ")") + { + ++paren_count; + } + else if (declaration[i].first == tok_type::OTHER && + declaration[i].second == "(") + { + --paren_count; + if (paren_count == 0) + { + paren_pos = i; + break; + } + } + } + + + if (contains_operator) + { + name = declaration[operator_pos].second; + for (unsigned long i = operator_pos+1; i < paren_pos; ++i) + { + if (declaration[i].first == tok_type::IDENTIFIER || declaration[i].first == tok_type::KEYWORD) + { + name += " "; + } + + name += declaration[i].second; + } + } + else + { + // if this is a destructor then include the ~ + if (paren_pos > 1 && declaration[paren_pos-2].second == "~") + name = "~" + declaration[paren_pos-1].second; + else if (paren_pos > 0) + name = declaration[paren_pos-1].second; + + + } + + return name; +} + +// ---------------------------------------------------------------------------------------- + +string pretty_print_declaration ( + const std::vector<std::pair<int,string> >& decl +) +{ + string temp; + long angle_count = 0; + long paren_count = 0; + + if (decl.size() == 0) + return temp; + + temp = decl[0].second; + + + bool just_closed_template = false; + bool in_template = false; + bool last_was_scope_res = false; + bool seen_operator = false; + + if (temp == "operator") + seen_operator = true; + + for (unsigned long i = 1; i < decl.size(); ++i) + { + bool last_was_less_than = false; + if (decl[i-1].first == tok_type::OTHER && decl[i-1].second == "<") + last_was_less_than = true; + + + if (decl[i].first == tok_type::OTHER && decl[i].second == "<" && + (decl[i-1].second != "operator" && ((i>1 && decl[i-2].second != "operator") || decl[i-1].second != "<") )) + ++angle_count; + + if (decl[i-1].first == tok_type::KEYWORD && decl[i-1].second == "template" && + decl[i].first == tok_type::OTHER && decl[i].second == "<") + { + in_template = true; + temp += " <\n "; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == ">") + { + // don't count angle brackets when they are part of an operator + if (decl[i-1].second != "operator" && ((i>1 && decl[i-2].second != "operator") || decl[i-1].second != ">")) + --angle_count; + + if (angle_count == 0 && in_template) + { + temp += "\n >\n"; + just_closed_template = true; + in_template = false; + } + else + { + temp += ">"; + } + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "<") + { + temp += "<"; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == ",") + { + if (in_template || (paren_count == 1 && angle_count == 0)) + temp += ",\n "; + else + temp += ","; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "&") + { + temp += "&"; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == ".") + { + temp += "."; + } + else if (decl[i].first == tok_type::SINGLE_QUOTED_TEXT) + { + temp += decl[i].second; + } + else if (decl[i].first == tok_type::DOUBLE_QUOTED_TEXT) + { + temp += decl[i].second; + } + else if (decl[i-1].first == tok_type::SINGLE_QUOTED_TEXT && decl[i].second == "'") + { + temp += decl[i].second; + } + else if (decl[i-1].first == tok_type::DOUBLE_QUOTED_TEXT && decl[i].second == "\"") + { + temp += decl[i].second; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "[") + { + temp += "["; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "]") + { + temp += "]"; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "-") + { + temp += "-"; + } + else if (decl[i].first == tok_type::NUMBER) + { + if (decl[i-1].second == "=") + temp += " " + decl[i].second; + else + temp += decl[i].second; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "*") + { + temp += "*"; + } + else if (decl[i].first == tok_type::KEYWORD && decl[i].second == "operator") + { + temp += "\noperator"; + seen_operator = true; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == ":" && + (decl[i-1].second == ":" || (i+1<decl.size() && decl[i+1].second == ":") ) ) + { + temp += ":"; + last_was_scope_res = true; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == "(") + { + const bool next_is_paren = (i+1 < decl.size() && decl[i+1].first == tok_type::OTHER && decl[i+1].second == ")"); + + if (paren_count == 0 && next_is_paren == false && in_template == false) + temp += " (\n "; + else + temp += "("; + + ++paren_count; + } + else if (decl[i].first == tok_type::OTHER && decl[i].second == ")") + { + --paren_count; + if (paren_count == 0 && decl[i-1].second != "(" && in_template == false) + temp += "\n)"; + else + temp += ")"; + } + else if (decl[i].first == tok_type::IDENTIFIER && i+1 < decl.size() && + decl[i+1].first == tok_type::OTHER && decl[i+1].second == "(") + { + if (just_closed_template || paren_count != 0 || decl[i-1].second == "~") + temp += decl[i].second; + else if (seen_operator) + temp += " " + decl[i].second; + else + temp += "\n" + decl[i].second; + + just_closed_template = false; + last_was_scope_res = false; + } + else + { + if (just_closed_template || last_was_scope_res || last_was_less_than || + (seen_operator && paren_count == 0 && decl[i].first == tok_type::OTHER ) || + ((decl[i].first == tok_type::KEYWORD || decl[i].first == tok_type::IDENTIFIER) && i>0 && decl[i-1].second == "(")) + temp += decl[i].second; + else + temp += " " + decl[i].second; + + just_closed_template = false; + last_was_scope_res = false; + } + + + + } + + return temp; +} + +// ---------------------------------------------------------------------------------------- + +string format_comment ( + const string& comment, + const unsigned long expand_tabs +) +{ + if (comment.size() <= 6) + return ""; + + string temp = trim(trim(comment.substr(3,comment.size()-6), " \t"), "\n\r"); + + + // if we should expand tabs to spaces + if (expand_tabs != 0) + { + unsigned long column = 0; + string str; + for (unsigned long i = 0; i < temp.size(); ++i) + { + if (temp[i] == '\t') + { + const unsigned long num_spaces = expand_tabs - column%expand_tabs; + column += num_spaces; + str.insert(str.end(), num_spaces, ' '); + } + else if (temp[i] == '\n' || temp[i] == '\r') + { + str += temp[i]; + column = 0; + } + else + { + str += temp[i]; + ++column; + } + } + + // put str into temp + str.swap(temp); + } + + // now figure out what the smallest amount of leading white space is and remove it from each line. + unsigned long num_whitespace = 100000; + + string::size_type pos1 = 0, pos2 = 0; + + while (pos1 != string::npos) + { + // find start of non-white-space + pos2 = temp.find_first_not_of(" \t",pos1); + + // if this is a line of just white space then ignore it + if (pos2 != string::npos && temp[pos2] != '\n' && temp[pos2] != '\r') + { + if (pos2-pos1 < num_whitespace) + num_whitespace = pos2-pos1; + } + + // find end-of-line + pos1 = temp.find_first_of("\n\r", pos2); + // find start of next line + pos2 = temp.find_first_not_of("\n\r", pos1); + pos1 = pos2; + } + + // now remove the leading white space + string temp2; + unsigned long counter = 0; + for (unsigned long i = 0; i < temp.size(); ++i) + { + // if we are looking at a new line + if (temp[i] == '\n' || temp[i] == '\r') + { + counter = 0; + } + else if (counter < num_whitespace) + { + ++counter; + continue; + } + + temp2 += temp[i]; + } + + return temp2; +} + +// ---------------------------------------------------------------------------------------- + +typedef_record convert_tok_typedef_record ( + const tok_typedef_record& rec +) +{ + typedef_record temp; + temp.declaration = pretty_print_declaration(rec.declaration); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +variable_record convert_tok_variable_record ( + const tok_variable_record& rec +) +{ + variable_record temp; + temp.declaration = pretty_print_declaration(rec.declaration); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +method_record convert_tok_method_record ( + const tok_method_record& rec, + const unsigned long expand_tabs +) +{ + method_record temp; + + temp.comment = format_comment(rec.comment, expand_tabs); + temp.name = get_function_name(rec.declaration); + temp.declaration = pretty_print_declaration(rec.declaration); + return temp; +} + +// ---------------------------------------------------------------------------------------- + +class_record convert_tok_class_record ( + const tok_class_record& rec, + const unsigned long expand_tabs +) +{ + class_record crec; + + + crec.scope = rec.scope; + crec.file = rec.file; + crec.comment = format_comment(rec.comment, expand_tabs); + + crec.name.clear(); + + // find the first class token + for (unsigned long i = 0; i+1 < rec.declaration.size(); ++i) + { + if (rec.declaration[i].first == tok_type::KEYWORD && + (rec.declaration[i].second == "class" || + rec.declaration[i].second == "struct" ) + ) + { + crec.name = rec.declaration[i+1].second; + break; + } + } + + crec.declaration = pretty_print_declaration(rec.declaration); + + for (unsigned long i = 0; i < rec.public_typedefs.size(); ++i) + crec.public_typedefs.push_back(convert_tok_typedef_record(rec.public_typedefs[i])); + + for (unsigned long i = 0; i < rec.public_variables.size(); ++i) + crec.public_variables.push_back(convert_tok_variable_record(rec.public_variables[i])); + + for (unsigned long i = 0; i < rec.protected_typedefs.size(); ++i) + crec.protected_typedefs.push_back(convert_tok_typedef_record(rec.protected_typedefs[i])); + + for (unsigned long i = 0; i < rec.protected_variables.size(); ++i) + crec.protected_variables.push_back(convert_tok_variable_record(rec.protected_variables[i])); + + for (unsigned long i = 0; i < rec.public_methods.size(); ++i) + crec.public_methods.push_back(convert_tok_method_record(rec.public_methods[i], expand_tabs)); + + for (unsigned long i = 0; i < rec.protected_methods.size(); ++i) + crec.protected_methods.push_back(convert_tok_method_record(rec.protected_methods[i], expand_tabs)); + + for (unsigned long i = 0; i < rec.public_inner_classes.size(); ++i) + crec.public_inner_classes.push_back(convert_tok_class_record(rec.public_inner_classes[i], expand_tabs)); + + for (unsigned long i = 0; i < rec.protected_inner_classes.size(); ++i) + crec.protected_inner_classes.push_back(convert_tok_class_record(rec.protected_inner_classes[i], expand_tabs)); + + + return crec; +} + +// ---------------------------------------------------------------------------------------- + +function_record convert_tok_function_record ( + const tok_function_record& rec, + const unsigned long expand_tabs +) +{ + function_record temp; + + temp.scope = rec.scope; + temp.file = rec.file; + temp.comment = format_comment(rec.comment, expand_tabs); + temp.name = get_function_name(rec.declaration); + temp.declaration = pretty_print_declaration(rec.declaration); + + return temp; +} + +// ---------------------------------------------------------------------------------------- + +void convert_to_normal_records ( + const std::vector<tok_function_record>& tok_functions, + const std::vector<tok_class_record>& tok_classes, + const unsigned long expand_tabs, + std::vector<function_record>& functions, + std::vector<class_record>& classes +) +{ + functions.clear(); + classes.clear(); + + + for (unsigned long i = 0; i < tok_functions.size(); ++i) + { + functions.push_back(convert_tok_function_record(tok_functions[i], expand_tabs)); + } + + + for (unsigned long i = 0; i < tok_classes.size(); ++i) + { + classes.push_back(convert_tok_class_record(tok_classes[i], expand_tabs)); + } + + +} + +// ---------------------------------------------------------------------------------------- + +string add_entity_ref (const string& str) +{ + string temp; + for (unsigned long i = 0; i < str.size(); ++i) + { + if (str[i] == '&') + temp += "&"; + else if (str[i] == '<') + temp += "<"; + else if (str[i] == '>') + temp += ">"; + else + temp += str[i]; + } + return temp; +} + +// ---------------------------------------------------------------------------------------- + +string flip_slashes (string str) +{ + for (unsigned long i = 0; i < str.size(); ++i) + { + if (str[i] == '\\') + str[i] = '/'; + } + return str; +} + +// ---------------------------------------------------------------------------------------- + +void write_as_xml ( + const function_record& rec, + ostream& fout +) +{ + fout << " <function>\n"; + fout << " <name>" << add_entity_ref(rec.name) << "</name>\n"; + fout << " <scope>" << add_entity_ref(rec.scope) << "</scope>\n"; + fout << " <declaration>" << add_entity_ref(rec.declaration) << "</declaration>\n"; + fout << " <file>" << flip_slashes(add_entity_ref(rec.file)) << "</file>\n"; + fout << " <comment>" << add_entity_ref(rec.comment) << "</comment>\n"; + fout << " </function>\n"; +} + +// ---------------------------------------------------------------------------------------- + +void write_as_xml ( + const class_record& rec, + ostream& fout, + unsigned long indent +) +{ + const string pad(indent, ' '); + + fout << pad << "<class>\n"; + fout << pad << " <name>" << add_entity_ref(rec.name) << "</name>\n"; + fout << pad << " <scope>" << add_entity_ref(rec.scope) << "</scope>\n"; + fout << pad << " <declaration>" << add_entity_ref(rec.declaration) << "</declaration>\n"; + fout << pad << " <file>" << flip_slashes(add_entity_ref(rec.file)) << "</file>\n"; + fout << pad << " <comment>" << add_entity_ref(rec.comment) << "</comment>\n"; + + + if (rec.public_typedefs.size() > 0) + { + fout << pad << " <public_typedefs>\n"; + for (unsigned long i = 0; i < rec.public_typedefs.size(); ++i) + { + fout << pad << " <typedef>" << add_entity_ref(rec.public_typedefs[i].declaration) << "</typedef>\n"; + } + fout << pad << " </public_typedefs>\n"; + } + + + if (rec.public_variables.size() > 0) + { + fout << pad << " <public_variables>\n"; + for (unsigned long i = 0; i < rec.public_variables.size(); ++i) + { + fout << pad << " <variable>" << add_entity_ref(rec.public_variables[i].declaration) << "</variable>\n"; + } + fout << pad << " </public_variables>\n"; + } + + if (rec.protected_typedefs.size() > 0) + { + fout << pad << " <protected_typedefs>\n"; + for (unsigned long i = 0; i < rec.protected_typedefs.size(); ++i) + { + fout << pad << " <typedef>" << add_entity_ref(rec.protected_typedefs[i].declaration) << "</typedef>\n"; + } + fout << pad << " </protected_typedefs>\n"; + } + + + if (rec.protected_variables.size() > 0) + { + fout << pad << " <protected_variables>\n"; + for (unsigned long i = 0; i < rec.protected_variables.size(); ++i) + { + fout << pad << " <variable>" << add_entity_ref(rec.protected_variables[i].declaration) << "</variable>\n"; + } + fout << pad << " </protected_variables>\n"; + } + + + if (rec.public_methods.size() > 0) + { + fout << pad << " <public_methods>\n"; + for (unsigned long i = 0; i < rec.public_methods.size(); ++i) + { + fout << pad << " <method>\n"; + fout << pad << " <name>" << add_entity_ref(rec.public_methods[i].name) << "</name>\n"; + fout << pad << " <declaration>" << add_entity_ref(rec.public_methods[i].declaration) << "</declaration>\n"; + fout << pad << " <comment>" << add_entity_ref(rec.public_methods[i].comment) << "</comment>\n"; + fout << pad << " </method>\n"; + } + fout << pad << " </public_methods>\n"; + } + + + if (rec.protected_methods.size() > 0) + { + fout << pad << " <protected_methods>\n"; + for (unsigned long i = 0; i < rec.protected_methods.size(); ++i) + { + fout << pad << " <method>\n"; + fout << pad << " <name>" << add_entity_ref(rec.protected_methods[i].name) << "</name>\n"; + fout << pad << " <declaration>" << add_entity_ref(rec.protected_methods[i].declaration) << "</declaration>\n"; + fout << pad << " <comment>" << add_entity_ref(rec.protected_methods[i].comment) << "</comment>\n"; + fout << pad << " </method>\n"; + } + fout << pad << " </protected_methods>\n"; + } + + + if (rec.public_inner_classes.size() > 0) + { + fout << pad << " <public_inner_classes>\n"; + for (unsigned long i = 0; i < rec.public_inner_classes.size(); ++i) + { + write_as_xml(rec.public_inner_classes[i], fout, indent+4); + } + fout << pad << " </public_inner_classes>\n"; + } + + if (rec.protected_inner_classes.size() > 0) + { + fout << pad << " <protected_inner_classes>\n"; + for (unsigned long i = 0; i < rec.protected_inner_classes.size(); ++i) + { + write_as_xml(rec.protected_inner_classes[i], fout, indent+4); + } + fout << pad << " </protected_inner_classes>\n"; + } + + + fout << pad << "</class>\n"; +} + +// ---------------------------------------------------------------------------------------- + +void save_to_xml_file ( + const std::vector<function_record>& functions, + const std::vector<class_record>& classes +) +{ + ofstream fout("output.xml"); + + fout << "<!-- This XML file was generated using the htmlify tool available from http://dlib.net. -->" << endl; + fout << "<code>" << endl; + + fout << " <classes>" << endl; + for (unsigned long i = 0; i < classes.size(); ++i) + { + write_as_xml(classes[i], fout, 4); + fout << "\n"; + } + fout << " </classes>\n\n" << endl; + + + fout << " <global_functions>" << endl; + for (unsigned long i = 0; i < functions.size(); ++i) + { + write_as_xml(functions[i], fout); + fout << "\n"; + } + fout << " </global_functions>" << endl; + + fout << "</code>" << endl; +} + +// ---------------------------------------------------------------------------------------- + +void generate_xml_markup( + const cmd_line_parser<char>::check_1a_c& parser, + const std::string& filter, + const unsigned long search_depth, + const unsigned long expand_tabs +) +{ + + // first figure out which files should be processed + std::vector<std::pair<string,string> > files; + obtain_list_of_files(parser, filter, search_depth, files); + + + std::vector<tok_function_record> tok_functions; + std::vector<tok_class_record> tok_classes; + + for (unsigned long i = 0; i < files.size(); ++i) + { + ifstream fin(files[i].second.c_str()); + if (!fin) + { + cerr << "Error opening file: " << files[i].second << endl; + return; + } + process_file(fin, files[i].first, tok_functions, tok_classes); + } + + std::vector<function_record> functions; + std::vector<class_record> classes; + + convert_to_normal_records(tok_functions, tok_classes, expand_tabs, functions, classes); + + save_to_xml_file(functions, classes); +} + +// ---------------------------------------------------------------------------------------- + |