diff options
Diffstat (limited to 'src/boost/tools/auto_index/src/file_scanning.cpp')
-rw-r--r-- | src/boost/tools/auto_index/src/file_scanning.cpp | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/src/boost/tools/auto_index/src/file_scanning.cpp b/src/boost/tools/auto_index/src/file_scanning.cpp new file mode 100644 index 000000000..f6c2209c6 --- /dev/null +++ b/src/boost/tools/auto_index/src/file_scanning.cpp @@ -0,0 +1,495 @@ +// Copyright 2008 John Maddock +// +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "auto_index.hpp" + +bool need_defaults = true; + +void install_default_scanners() +{ + need_defaults = false; + // + // Set the default scanners if they're not defined already: + // + file_scanner s; + s.type = "class_name"; + if(file_scanner_set.find(s) == file_scanner_set.end()) + { + add_file_scanner( + "class_name", // Index type + // Header file scanner regex: + // possibly leading whitespace: + "^[[:space:]]*" + // possible template declaration: + "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" + // class or struct: + "(class|struct)[[:space:]]*" + // leading declspec macros etc: + "(" + "\\<\\w+\\>" + "(" + "[[:blank:]]*\\([^)]*\\)" + ")?" + "[[:space:]]*" + ")*" + // the class name + "(\\<\\w*\\>)[[:space:]]*" + // template specialisation parameters + "(<[^;:{]+>)?[[:space:]]*" + // terminate in { or : + "(\\{|:[^;\\{()]*\\{)", + + "(?:class|struct)[^;{]+\\\\<\\5\\\\>[^;{]+\\\\{", // Format string to create indexing regex. + "\\5", // Format string to create index term. + "", // Filter regex for section id's. + "" // Filter regex for filenames. + ); + } + + s.type = "typedef_name"; + if(file_scanner_set.find(s) == file_scanner_set.end()) + { + add_file_scanner( + "typedef_name", // Index type + "typedef[^;{}#]+?(\\w+)\\s*;", // scanner regex + "typedef[^;]+\\\\<\\1\\\\>\\\\s*;", // Format string to create indexing regex. + "\\1", // Format string to create index term. + "", // Filter regex for section id's. + "" // Filter regex for filenames. + ); + } + + s.type = "macro_name"; + if(file_scanner_set.find(s) == file_scanner_set.end()) + { + add_file_scanner( + "macro_name", // Index type + "^\\s*#\\s*define\\s+(\\w+)", // scanner regex + "\\\\<\\1\\\\>", // Format string to create indexing regex. + "\\1", // Format string to create index term. + "", // Filter regex for section id's. + "" // Filter regex for filenames. + ); + } + + s.type = "function_name"; + if(file_scanner_set.find(s) == file_scanner_set.end()) + { + add_file_scanner( + "function_name", // Index type + "\\w++(?:\\s*+<[^>]++>)?[\\s&*]+?(\\w+)\\s*(?:BOOST_[[:upper:]_]+\\s*)?\\([^;{}]*\\)\\s*[;{]", // scanner regex + "\\\\<\\\\w+\\\\>(?:\\\\s+<[^>]*>)*[\\\\s&*]+\\\\<\\1\\\\>\\\\s*\\\\([^;{]*\\\\)", // Format string to create indexing regex. + "\\1", // Format string to create index term. + "", // Filter regex for section id's. + "" // Filter regex for filenames. + ); + } +} + +// +// Helper to dump file contents into a std::string: +// +void load_file(std::string& s, std::istream& is) +{ + s.erase(); + if(is.bad()) return; + s.reserve(is.rdbuf()->in_avail()); + char c; + while(is.get(c)) + { + if(s.capacity() == s.size()) + s.reserve(s.capacity() * 3); + s.append(1, c); + } +} +// +// Helper to convert string from external source into valid XML: +// +std::string escape_to_xml(const std::string& in) +{ + std::string result; + for(std::string::size_type i = 0; i < in.size(); ++i) + { + switch(in[i]) + { + case '&': + result.append("&"); + break; + case '<': + result.append("<"); + break; + case '>': + result.append(">"); + break; + case '"': + result.append("""); + break; + default: + result.append(1, in[i]); + } + } + return result; +} +// +// Scan a source file for things to index: +// +void scan_file(const std::string& file) +{ + if(need_defaults) + install_default_scanners(); + if(verbose) + std::cout << "Scanning file... " << file << std::endl; + std::string text; + std::ifstream is(file.c_str()); + if(!is.peek() || !is.good()) + throw std::runtime_error(std::string("Unable to read from file: ") + file); + load_file(text, is); + + for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan) + { + bool need_debug = false; + if(!debug.empty() && regex_match(pscan->type, ::debug)) + { + need_debug = true; + std::cout << "Processing scanner " << pscan->type << " on file " << file << std::endl; + std::cout << "Scanner regex:" << pscan->scanner << std::endl; + std::cout << "Scanner formatter (search regex):" << pscan->format_string << std::endl; + std::cout << "Scanner formatter (index term):" << pscan->term_formatter << std::endl; + std::cout << "Scanner file name filter:" << pscan->file_name_filter << std::endl; + std::cout << "Scanner section id filter:" << pscan->section_filter << std::endl; + } + if(!pscan->file_name_filter.empty()) + { + if(!regex_match(file, pscan->file_name_filter)) + { + if(need_debug) + { + std::cout << "File failed to match file name filter, this file will be skipped..." << std::endl; + } + continue; // skip this file + } + } + if(verbose && !need_debug) + std::cout << "Scanning for type \"" << (*pscan).type << "\" ... " << std::endl; + boost::sregex_iterator i(text.begin(), text.end(), (*pscan).scanner), j; + while(i != j) + { + try + { + index_info info; + info.term = escape_to_xml(i->format(pscan->term_formatter)); + info.search_text = i->format(pscan->format_string); + info.category = pscan->type; + if(!pscan->section_filter.empty()) + info.search_id = pscan->section_filter; + std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info); + if(pos.second) + { + if(verbose || need_debug) + std::cout << "Indexing " << info.term << " as type " << info.category << std::endl; + if(need_debug) + std::cout << "Search regex will be: \"" << info.search_text << "\"" << + " ID constraint is: \"" << info.search_id << "\"" + << "Found text was: " << i->str() << std::endl; + if(pos.first->search_text != info.search_text) + { + // + // Merge the search terms: + // + const_cast<boost::regex&>(pos.first->search_text) = + "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")"; + } + if(pos.first->search_id != info.search_id) + { + // + // Merge the ID constraints: + // + const_cast<boost::regex&>(pos.first->search_id) = + "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")"; + } + } + } + catch(const boost::regex_error& e) + { + std::cerr << "Unable to create regular expression from found index term:\"" + << i->format(pscan->term_formatter) << "\" In file " << file << std::endl; + std::cerr << e.what() << std::endl; + } + catch(const std::exception& e) + { + std::cerr << "Unable to create index term:\"" + << i->format(pscan->term_formatter) << "\" In file " << file << std::endl; + std::cerr << e.what() << std::endl; + throw; + } + ++i; + } + } +} +// +// Scan a whole directory for files to search: +// +void scan_dir(const std::string& dir, const std::string& mask, bool recurse) +{ + using namespace boost::filesystem; + boost::regex e(mask); + directory_iterator i(dir), j; + + while(i != j) + { + if(regex_match(i->path().filename().string(), e)) + { + scan_file(i->path().string()); + } + else if(recurse && is_directory(i->status())) + { + scan_dir(i->path().string(), mask, recurse); + } + ++i; + } +} +// +// Remove quotes from a string: +// +std::string unquote(const std::string& s) +{ + std::string result(s); + if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"')) + { + result.erase(result.begin()); + result.erase(result.end() - 1); + } + return result; +} +// +// Load and process a script file: +// +void process_script(const std::string& script) +{ + static const boost::regex comment_parser( + "\\s*(?:#.*)?$" + ); + static const boost::regex scan_parser( + "!scan[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*" + ); + static const boost::regex scan_dir_parser( + "!scan-path[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" + "[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" + "(?:" + "[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" + ")?\\s*" + ); + static const boost::regex entry_parser( + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" + "(?:" + "[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")" + "(?:" + "[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")" + "(?:" + "[[:space:]]+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")" + ")?" + ")?" + ")?" + "[[:space:]]*"); + static const boost::regex rewrite_parser( + "!(rewrite-name|rewrite-id)\\s+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*" + ); + static const boost::regex debug_parser( + "!debug\\s+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*" + ); + static const boost::regex define_scanner_parser( + "!define-scanner\\s+" + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // type, index 1 + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // scanner regex, index 2 + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // format string, index 3 + "([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // format string for name, index 4 + "(?:" + "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // id-filter, index 5 + "(?:" + "\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // filename-filter, index 6 + ")?" + ")?" + "\\s*" + ); + static const boost::regex error_parser("!.*"); + + if(verbose) + std::cout << "Processing script " << script << std::endl; + boost::smatch what; + std::string line; + std::ifstream is(script.c_str()); + if(is.bad() || !exists(boost::filesystem::path(script))) + { + throw std::runtime_error(std::string("Could not open script file: ") + script); + } + while(std::getline(is, line).good()) + { + if(regex_match(line, what, comment_parser)) + { + // Nothing to do here... + } + else if(regex_match(line, what, scan_parser)) + { + std::string f = unquote(what[1].str()); + if(!boost::filesystem::path(f).is_complete()) + { + if(prefix.size()) + { + boost::filesystem::path base(prefix); + base /= f; + f = base.string(); + } + else + { + boost::filesystem::path base(script); + base.remove_filename(); + base /= f; + f = base.string(); + } + } + if(!exists(boost::filesystem::path(f))) + throw std::runtime_error("Error the file requested for scanning does not exist: " + f); + scan_file(f); + } + else if(regex_match(line, what, debug_parser)) + { + debug = unquote(what[1].str()); + } + else if(regex_match(line, what, define_scanner_parser)) + { + add_file_scanner(unquote(what.str(1)), unquote(what.str(2)), unquote(what.str(3)), + unquote(what.str(4)), unquote(what.str(5)), unquote(what.str(6))); + } + else if(regex_match(line, what, scan_dir_parser)) + { + std::string d = unquote(what[1].str()); + std::string m = unquote(what[2].str()); + bool r = unquote(what[3].str()) == "true"; + if(!boost::filesystem::path(d).is_complete()) + { + if(prefix.size()) + { + boost::filesystem::path base(prefix); + base /= d; + d = base.string(); + } + else + { + boost::filesystem::path base(script); + base.remove_filename(); + base /= d; + d = base.string(); + } + } + if(verbose) + std::cout << "Scanning directory " << d << std::endl; + if(!exists(boost::filesystem::path(d))) + throw std::runtime_error("Error the path requested for scanning does not exist: " + d); + scan_dir(d, m, r); + } + else if(regex_match(line, what, rewrite_parser)) + { + bool id = what[1] == "rewrite-id"; + std::string a = unquote(what[2].str()); + std::string b = unquote(what[3].str()); + id_rewrite_list.push_back(id_rewrite_rule(a, b, id)); + } + else if(line.compare(0, 9, "!exclude ") == 0) + { + static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"); + boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j; + while(i != j) + { + index_info info; + info.term = escape_to_xml(unquote(*i)); + // Erase all entries that have a category in our scanner set, + // plus any entry with no category at all: + index_terms.erase(info); + for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan) + { + info.category = (*pscan).type; + index_terms.erase(info); + } + ++i; + } + } + else if(regex_match(line, error_parser)) + { + std::cerr << "Error: Unable to process line: " << line << std::endl; + } + else if(regex_match(line, what, entry_parser)) + { + try{ + // what[1] is the Index entry + // what[2] is the regex to search for (optional) + // what[3] is a section id that must be matched + // in order for the term to be indexed (optional) + // what[4] is the index category to place the term in (optional). + index_info info; + info.term = escape_to_xml(unquote(what.str(1))); + std::string s = unquote(what.str(2)); + if(s.size()) + info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl); + else + info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl); + + s = unquote(what.str(3)); + if(s.size()) + info.search_id = s; + if(what[4].matched) + info.category = unquote(what.str(4)); + std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info); + if(pos.second) + { + if(pos.first->search_text != info.search_text) + { + // + // Merge the search terms: + // + const_cast<boost::regex&>(pos.first->search_text) = + "(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")"; + } + if(pos.first->search_id != info.search_id) + { + // + // Merge the ID constraints: + // + const_cast<boost::regex&>(pos.first->search_id) = + "(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")"; + } + } + } + catch(const boost::regex_error&) + { + std::cerr << "Unable to process regular expression in script line:\n \"" + << line << "\"" << std::endl; + throw; + } + catch(const std::exception&) + { + std::cerr << "Unable to process script line:\n \"" + << line << "\"" << std::endl; + throw; + } + } + else + { + std::cerr << "Error: Unable to process line: " << line << std::endl; + } + } +} + |