#include "to_xml.h" #include "dlib/dir_nav.h" #include #include #include #include #include #include "dlib/cpp_tokenizer.h" using namespace dlib; using namespace std; // ---------------------------------------------------------------------------------------- typedef cpp_tokenizer::kernel_1a_c tok_type; // ---------------------------------------------------------------------------------------- class file_filter { public: file_filter( const string& filter ) { // pick out the filter strings istringstream sin(filter); string temp; sin >> temp; while (sin) { endings.push_back("." + temp); sin >> temp; } } bool operator() ( const file& f) const { // check if any of the endings match for (unsigned long i = 0; i < endings.size(); ++i) { // if the ending is bigger than f's name then it obviously doesn't match if (endings[i].size() > f.name().size()) continue; // now check if the actual characters that make up the end of the file name // matches what is in endings[i]. if ( std::equal(endings[i].begin(), endings[i].end(), f.name().end()-endings[i].size())) return true; } return false; } std::vector endings; }; // ---------------------------------------------------------------------------------------- void obtain_list_of_files ( const cmd_line_parser::check_1a_c& parser, const std::string& filter, const unsigned long search_depth, std::vector& files ) { for (unsigned long i = 0; i < parser.option("i").count(); ++i) { const std::vector& temp = get_files_in_directory_tree(parser.option("i").argument(0,i), file_filter(filter), search_depth); files.insert(files.begin(), temp.begin(), temp.end()); } for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) { files.push_back(file(parser[i])); } std::sort(files.begin(), files.end()); } // ---------------------------------------------------------------------------------------- struct tok_function_record { std::vector > declaration; string scope; string file; string comment; }; struct tok_method_record { std::vector > declaration; string comment; }; struct tok_variable_record { std::vector > declaration; }; struct tok_typedef_record { std::vector > declaration; }; struct tok_class_record { std::vector > declaration; string name; string scope; string file; string comment; std::vector public_methods; std::vector public_variables; std::vector public_typedefs; std::vector public_subclasses; }; // ---------------------------------------------------------------------------------------- struct function_record { string name; string scope; string declaration; string file; string comment; }; struct method_record { string name; string declaration; string comment; }; struct variable_record { string declaration; }; struct typedef_record { string declaration; }; struct class_record { string name; string scope; string declaration; string file; string comment; std::vector public_methods; std::vector public_variables; std::vector public_typedefs; std::vector public_subclasses; }; // ---------------------------------------------------------------------------------------- unsigned long count_newlines ( const string& str ) /*! ensures - returns the number of '\n' characters inside str !*/ { unsigned long count = 0; for (unsigned long i = 0; i < str.size(); ++i) { if (str[i] == '\n') ++count; } return count; } // ---------------------------------------------------------------------------------------- bool contains_unescaped_newline ( const string& str ) /*! ensures - returns true if str contains a '\n' character that isn't preceded by a '\' character. !*/ { if (str.size() == 0) return false; if (str[0] == '\n') return true; for (unsigned long i = 1; i < str.size(); ++i) { if (str[i] == '\n' && str[i-1] != '\\') return true; } return false; } // ---------------------------------------------------------------------------------------- bool is_formal_comment ( const string& str ) { if (str.size() < 6) return false; if (str[0] == '/' && str[1] == '*' && str[2] == '!' && str[3] != 'P' && str[3] != 'p' && str[str.size()-3] == '!' && str[str.size()-2] == '*' && str[str.size()-1] == '/' ) return true; return false; } // ---------------------------------------------------------------------------------------- string make_scope_string ( const std::vector& namespaces, unsigned long exclude_last_num_scopes = 0 ) { string temp; for (unsigned long i = 0; i + exclude_last_num_scopes < namespaces.size(); ++i) { if (namespaces[i].size() == 0) continue; if (temp.size() == 0) temp = namespaces[i]; else temp += "::" + namespaces[i]; } return temp; } // ---------------------------------------------------------------------------------------- bool looks_like_function_declaration ( const std::vector >& declaration ) { // Check if declaration contains IDENTIFIER ( ) somewhere in it. bool seen_first_part = false; bool seen_operator = false; int local_paren_count = 0; for (unsigned long i = 1; i < declaration.size(); ++i) { if (declaration[i].first == tok_type::KEYWORD && declaration[i].second == "operator") { seen_operator = true; } if (declaration[i].first == tok_type::OTHER && declaration[i].second == "(" && (declaration[i-1].first == tok_type::IDENTIFIER || seen_operator)) { seen_first_part = true; } if (declaration[i].first == tok_type::OTHER) { if ( declaration[i].second == "(") ++local_paren_count; else if ( declaration[i].second == ")") --local_paren_count; } } if (seen_first_part && local_paren_count == 0) return true; else return false; } // ---------------------------------------------------------------------------------------- void process_file ( const string& file, std::vector& functions, std::vector& classes ) /*! ensures - scans the given file for global functions and appends any found into functions. - scans the given file for global classes and appends any found into classes. !*/ { tok_type tok; ifstream fin(file.c_str()); tok.set_stream(fin); bool recently_seen_struct_keyword = false; // true if we have seen the struct keyword and // we have not seen any identifiers or { characters string last_struct_name; // the name of the last struct we have seen bool recently_seen_class_keyword = false; // true if we have seen the class keyword and // we have not seen any identifiers or { characters string last_class_name; // the name of the last class we have seen bool recently_seen_namespace_keyword = false; // true if we have seen the namespace keyword and // we have not seen any identifiers or { characters string last_namespace_name; // the name of the last namespace we have seen bool recently_seen_pound_define = false; // true if we have seen a #define and haven't seen an unescaped newline bool recently_seen_typedef = false; // true if we have seen a typedef keyword and haven't seen a ; bool recently_seen_paren_0 = false; // true if we have seen paren_count transition to zero but haven't yet seen a ; or { or // a new line if recently_seen_pound_define is true. bool recently_seen_closing_bracket = false; // true if we have seen a } and haven't yet seen an IDENTIFIER or ; bool recently_seen_new_scope = false; // true if we have seen the keywords class, namespace, struct, or extern and // we have not seen the characters {, ), or ; since then bool at_top_of_new_scope = false; // true if we have seen the { that started a new scope but haven't seen anything yet but WHITE_SPACE std::vector namespaces; // a stack to hold the names of the scopes we have entered. This is the classes, structs, and namespaces we enter. namespaces.push_back(""); // this is the global namespace std::stack inside_public_scope; // If the stack isn't empty then we are inside a class or struct and the top value // in the stack tells if we are in a public region. std::stack scopes; // a stack to hold current and old scope counts // the top of the stack counts the number of new scopes (i.e. unmatched { ) we have entered // since we were at a scope where functions can be defined. // We also maintain the invariant that scopes.size() == namespaces.size() scopes.push(0); std::stack class_stack; // This is a stack where class_stack.top() == the incomplete class record for the class declaration we are // currently in. unsigned long paren_count = 0; // this is the number of ( we have seen minus the number of ) we have // seen. std::vector > token_accum; // Used to accumulate tokens for function and class declarations std::vector > last_full_declaration; // Once we determine that token_accum has a full declaration in it we copy it into last_full_declaration. int type; string token; tok.get_token(type, token); while (type != tok_type::END_OF_FILE) { switch(type) { case tok_type::KEYWORD: // ------------------------------------------ { token_accum.push_back(make_pair(type,token)); if (token == "class") { recently_seen_class_keyword = true; recently_seen_new_scope = true; } else if (token == "struct") { recently_seen_struct_keyword = true; recently_seen_new_scope = true; } else if (token == "namespace") { cout << "hit namespace" << endl; recently_seen_namespace_keyword = true; recently_seen_new_scope = true; } else if (token == "extern") { recently_seen_new_scope = true; } else if (token == "#define") { recently_seen_pound_define = true; } else if (token == "typedef") { recently_seen_typedef = true; } else if (recently_seen_pound_define == false) { // eat white space int temp_type; string temp_token; if (tok.peek_type() == tok_type::WHITE_SPACE) tok.get_token(temp_type, temp_token); const bool next_is_colon = (tok.peek_type() == tok_type::OTHER && tok.peek_token() == ":"); if (next_is_colon) { // eat the colon tok.get_token(temp_type, temp_token); if (inside_public_scope.size() > 0 && token == "public") { inside_public_scope.top() = true; token_accum.clear(); last_full_declaration.clear(); } else if (inside_public_scope.size() > 0 && token == "protected") { inside_public_scope.top() = true; token_accum.clear(); last_full_declaration.clear(); } else if (inside_public_scope.size() > 0 && token == "private") { inside_public_scope.top() = false; token_accum.clear(); last_full_declaration.clear(); } } } at_top_of_new_scope = false; }break; case tok_type::COMMENT: // ------------------------------------------ { if (scopes.top() == 0 && last_full_declaration.size() > 0 && is_formal_comment(token) && paren_count == 0) { // if we are inside a class or struct if (inside_public_scope.size() > 0) { // if we are looking at a comment at the top of a class if (at_top_of_new_scope) { // push an entry for this class into the class_stack tok_class_record temp; temp.declaration = last_full_declaration; temp.file = file; temp.name = namespaces.back(); temp.scope = make_scope_string(namespaces,1); temp.comment = token; class_stack.push(temp); } else if (inside_public_scope.top()) { // This should be a member function. // Only do anything if the class that contains this member function is // in the class_stack. if (class_stack.size() > 0 && class_stack.top().name == namespaces.back() && looks_like_function_declaration(last_full_declaration)) { tok_method_record temp; temp.declaration = last_full_declaration; temp.comment = token; class_stack.top().public_methods.push_back(temp); } } } else { // we should be looking at a global declaration of some kind. if (looks_like_function_declaration(last_full_declaration)) { tok_function_record temp; temp.declaration = last_full_declaration; temp.file = file; temp.scope = make_scope_string(namespaces); temp.comment = token; functions.push_back(temp); } } token_accum.clear(); last_full_declaration.clear(); } at_top_of_new_scope = false; }break; case tok_type::IDENTIFIER: // ------------------------------------------ { if (recently_seen_class_keyword) { last_class_name = token; last_struct_name.clear(); last_namespace_name.clear(); } else if (recently_seen_struct_keyword) { last_struct_name = token; last_class_name.clear(); last_namespace_name.clear(); } else if (recently_seen_namespace_keyword) { last_namespace_name = token; last_class_name.clear(); last_struct_name.clear(); } recently_seen_class_keyword = false; recently_seen_struct_keyword = false; recently_seen_namespace_keyword = false; recently_seen_closing_bracket = false; at_top_of_new_scope = false; token_accum.push_back(make_pair(type,token)); }break; case tok_type::OTHER: // ------------------------------------------ { switch(token[0]) { case '{': // if we are entering a new scope if (recently_seen_new_scope) { cout << "new scope" << endl; scopes.push(0); at_top_of_new_scope = true; // if we are entering a class if (last_class_name.size() > 0) { inside_public_scope.push(false); namespaces.push_back(last_class_name); } else if (last_struct_name.size() > 0) { inside_public_scope.push(true); namespaces.push_back(last_struct_name); } else if (last_namespace_name.size() > 0) { namespaces.push_back(last_namespace_name); } else { namespaces.push_back(""); } } else { scopes.top() += 1; } recently_seen_new_scope = false; recently_seen_class_keyword = false; recently_seen_struct_keyword = false; recently_seen_namespace_keyword = false; recently_seen_paren_0 = false; // a { at function scope is an end of a potential declaration if (scopes.top() == 0) { // put token_accum into last_full_declaration token_accum.swap(last_full_declaration); } token_accum.clear(); break; case '}': if (scopes.top() > 0) { scopes.top() -= 1; } else if (scopes.size() > 1) { scopes.pop(); namespaces.pop_back(); if (inside_public_scope.size() > 0) inside_public_scope.pop(); // if this class is a subclass of another then push it into the // public_subclasses field of it's containing class if (class_stack.size() > 1) { tok_class_record temp = class_stack.top(); class_stack.pop(); class_stack.top().public_subclasses.push_back(temp); } else if (class_stack.size() > 0) { classes.push_back(class_stack.top()); class_stack.pop(); } } token_accum.clear(); recently_seen_closing_bracket = true; at_top_of_new_scope = false; break; case ';': // a ; at function scope is an end of a potential declaration if (scopes.top() == 0) { // put token_accum into last_full_declaration token_accum.swap(last_full_declaration); } token_accum.clear(); // if we are inside the public area of a class and this ; might be the end // of a typedef or variable declaration if (scopes.top() == 0 && inside_public_scope.size() > 0 && inside_public_scope.top() == true && recently_seen_closing_bracket == false) { if (recently_seen_typedef) { // This should be a typedef inside the public area of a class or struct: // Only do anything if the class that contains this typedef is in the class_stack. if (class_stack.size() > 0 && class_stack.top().name == namespaces.back()) { tok_typedef_record temp; temp.declaration = last_full_declaration; class_stack.top().public_typedefs.push_back(temp); } } else if (recently_seen_paren_0 == false && recently_seen_new_scope == false) { // This should be some kind of public variable declaration inside a class or struct: // Only do anything if the class that contains this member variable is in the class_stack. if (class_stack.size() > 0 && class_stack.top().name == namespaces.back()) { tok_variable_record temp; temp.declaration = last_full_declaration; class_stack.top().public_variables.push_back(temp); } } } recently_seen_new_scope = false; recently_seen_typedef = false; recently_seen_paren_0 = false; recently_seen_closing_bracket = false; at_top_of_new_scope = false; break; case '(': ++paren_count; token_accum.push_back(make_pair(type,token)); at_top_of_new_scope = false; break; case ')': token_accum.push_back(make_pair(type,token)); --paren_count; if (paren_count == 0) { recently_seen_paren_0 = true; last_full_declaration = token_accum; } recently_seen_new_scope = false; at_top_of_new_scope = false; break; default: token_accum.push_back(make_pair(type,token)); at_top_of_new_scope = false; break; } }break; case tok_type::WHITE_SPACE: // ------------------------------------------ { if (recently_seen_pound_define) { if (contains_unescaped_newline(token)) { recently_seen_pound_define = false; recently_seen_paren_0 = false; // this is an end of a potential declaration token_accum.swap(last_full_declaration); token_accum.clear(); } } }break; default: // ------------------------------------------ { token_accum.push_back(make_pair(type,token)); at_top_of_new_scope = false; }break; } tok.get_token(type, token); } } // ---------------------------------------------------------------------------------------- string get_function_name ( const std::vector >& declaration ) { string name; bool last_was_operator = false; bool seen_operator = false; for (unsigned long i = 0; i < declaration.size(); ++i) { if (declaration[i].first == tok_type::OTHER && declaration[i].second == "(" && !last_was_operator ) { if (i != 0 && !seen_operator) { name = declaration[i-1].second; } break; } if (declaration[i].first == tok_type::KEYWORD && declaration[i].second == "operator") { last_was_operator = true; seen_operator = true; } else { last_was_operator = false; } if (seen_operator) { if (name.size() != 0 && (declaration[i].first == tok_type::IDENTIFIER || declaration[i].first == tok_type::KEYWORD) ) { name += " "; } name += declaration[i].second; } } return name; } // ---------------------------------------------------------------------------------------- typedef_record convert_tok_typedef_record ( const tok_typedef_record& rec ) { typedef_record temp; for (unsigned long i = 0; i < rec.declaration.size(); ++i) { temp.declaration += rec.declaration[i].second + " "; } return temp; } // ---------------------------------------------------------------------------------------- variable_record convert_tok_variable_record ( const tok_variable_record& rec ) { variable_record temp; for (unsigned long i = 0; i < rec.declaration.size(); ++i) { temp.declaration += rec.declaration[i].second + " "; } return temp; } // ---------------------------------------------------------------------------------------- method_record convert_tok_method_record ( const tok_method_record& rec ) { method_record temp; temp.comment = rec.comment; temp.name = get_function_name(rec.declaration); for (unsigned long i = 0; i < rec.declaration.size(); ++i) { temp.declaration += rec.declaration[i].second + " "; } return temp; } // ---------------------------------------------------------------------------------------- class_record convert_tok_class_record ( const tok_class_record& rec ) { class_record crec; crec.scope = rec.scope; crec.file = rec.file; crec.comment = rec.comment; crec.name.clear(); // find the first class token for (unsigned long i = 0; i+1 < rec.declaration.size(); ++i) { if (rec.declaration[i].first == tok_type::KEYWORD && (rec.declaration[i].second == "class" || rec.declaration[i].second == "struct" ) ) { crec.name = rec.declaration[i+1].second; break; } } crec.declaration.clear(); for (unsigned long i = 0; i < rec.declaration.size(); ++i) { crec.declaration += rec.declaration[i].second + " "; } for (unsigned long i = 0; i < rec.public_typedefs.size(); ++i) crec.public_typedefs.push_back(convert_tok_typedef_record(rec.public_typedefs[i])); for (unsigned long i = 0; i < rec.public_variables.size(); ++i) crec.public_variables.push_back(convert_tok_variable_record(rec.public_variables[i])); for (unsigned long i = 0; i < rec.public_methods.size(); ++i) crec.public_methods.push_back(convert_tok_method_record(rec.public_methods[i])); for (unsigned long i = 0; i < rec.public_subclasses.size(); ++i) crec.public_subclasses.push_back(convert_tok_class_record(rec.public_subclasses[i])); return crec; } // ---------------------------------------------------------------------------------------- function_record convert_tok_function_record ( const tok_function_record& rec ) { function_record temp; temp.scope = rec.scope; temp.file = rec.file; temp.comment = rec.comment; temp.name = get_function_name(rec.declaration); for (unsigned long i = 0; i < rec.declaration.size(); ++i) { temp.declaration += rec.declaration[i].second + " "; } return temp; } // ---------------------------------------------------------------------------------------- void convert_to_normal_records ( const std::vector& tok_functions, const std::vector& tok_classes, std::vector& functions, std::vector& classes ) { functions.clear(); classes.clear(); for (unsigned long i = 0; i < tok_functions.size(); ++i) { functions.push_back(convert_tok_function_record(tok_functions[i])); } for (unsigned long i = 0; i < tok_classes.size(); ++i) { classes.push_back(convert_tok_class_record(tok_classes[i])); } } // ---------------------------------------------------------------------------------------- void write_as_xml ( const function_record& rec, ostream& fout ) { fout << " \n"; fout << " " << rec.name << "\n"; fout << " " << rec.scope << "\n"; fout << " " << rec.declaration << "\n"; fout << " " << rec.file << "\n"; fout << " " << rec.comment << "\n"; fout << " \n"; } // ---------------------------------------------------------------------------------------- void write_as_xml ( const class_record& rec, ostream& fout ) { fout << " \n"; fout << " " << rec.name << "\n"; fout << " " << rec.scope << "\n"; fout << " " << rec.declaration << "\n"; fout << " " << rec.file << "\n"; fout << " " << rec.comment << "\n"; fout << " \n"; for (unsigned long i = 0; i < rec.public_typedefs.size(); ++i) { fout << " " << rec.public_typedefs[i].declaration << "\n"; } fout << " \n"; fout << " \n"; for (unsigned long i = 0; i < rec.public_variables.size(); ++i) { fout << " " << rec.public_variables[i].declaration << "\n"; } fout << " \n"; fout << " \n"; for (unsigned long i = 0; i < rec.public_methods.size(); ++i) { fout << " \n"; fout << " " << rec.public_methods[i].name << "\n"; fout << " " << rec.public_methods[i].declaration << "\n"; fout << " " << rec.public_methods[i].comment << "\n"; fout << " \n"; } fout << " \n"; fout << " \n"; for (unsigned long i = 0; i < rec.public_subclasses.size(); ++i) { write_as_xml(rec.public_subclasses[i], fout); } fout << " \n"; fout << " \n"; } // ---------------------------------------------------------------------------------------- void save_to_xml_file ( const std::vector& functions, const std::vector& classes ) { ofstream fout("output.xml"); fout << "" << endl; fout << " " << endl; for (unsigned long i = 0; i < classes.size(); ++i) { write_as_xml(classes[i], fout); fout << "\n"; } fout << " \n\n" << endl; fout << " " << endl; for (unsigned long i = 0; i < functions.size(); ++i) { write_as_xml(functions[i], fout); fout << "\n"; } fout << " " << endl; fout << "" << endl; } // ---------------------------------------------------------------------------------------- void generate_xml_markup( const cmd_line_parser::check_1a_c& parser, const std::string& filter, const unsigned long search_depth ) { // first figure out which files should be processed std::vector files; obtain_list_of_files(parser, filter, search_depth, files); cout << "files.size(): " << files.size() << endl; std::vector tok_functions; std::vector tok_classes; for (unsigned long i = 0; i < files.size(); ++i) { process_file(files[i].full_name(), tok_functions, tok_classes); } cout << "\ntok_functions.size(): " << tok_functions.size() << endl; cout << "tok_classes.size(): " << tok_classes.size() << endl; cout << "tok_classes[0].public_methods.size(): " << tok_classes[0].public_methods.size() << endl; cout << "tok_classes[0].public_typedefs.size(): " << tok_classes[0].public_typedefs.size() << endl; cout << "tok_classes[0].public_variables.size(): " << tok_classes[0].public_variables.size() << endl; cout << "tok_classes[0].public_subclasses.size(): " << tok_classes[0].public_subclasses.size() << endl; cout << endl; //cout << tok_functions[0].comment << endl; std::vector functions; std::vector classes; convert_to_normal_records(tok_functions, tok_classes, functions, classes); save_to_xml_file(functions, classes); } // ----------------------------------------------------------------------------------------