Untitled
unknown
plain_text
3 years ago
12 kB
6
Indexable
/* * usage: ./dependencyDiscoverer [-Idir] ... file.c|file.l|file.y ... * * processes the c/yacc/lex source file arguments, outputting the dependencies * between the corresponding .o file, the .c source file, and any included * .h files * * each .h file is also processed to yield a dependency between it and any * included .h files * * these dependencies are written to standard output in a form compatible with * make; for example, assume that foo.c includes inc1.h, and inc1.h includes * inc2.h and inc3.h; this results in * * foo.o: foo.c inc1.h inc2.h inc3.h * * note that system includes (i.e. those in angle brackets) are NOT processed * * dependencyDiscoverer uses the CPATH environment variable, which can contain a * set of directories separated by ':' to find included files * if any additional directories are specified in the command line, * these are prepFound to those in CPATH, left to right * * for example, if CPATH is "/home/user/include:/usr/local/group/include", * and if "-Ifoo/bar/include" is specified on the command line, then when * processing * #include "x.h" * x.h will be located by searching for the following files in this order * * ./x.h * foo/bar/include/x.h * /home/user/include/x.h * /usr/local/group/include/x.h */ /* * general design of main() * ======================== * There are three globally accessible variables: * - dirs: a vector storing the directories to search for headers * - theTable: a hash table mapping file names to a list of dependent file names * - workQ: a list of file names that have to be processed * * 1. look up CPATH in environment * 2. assemble dirs vector from ".", any -Idir flags, and fields in CPATH * (if it is defined) * 3. for each file argument (after -Idir flags) * a. insert mapping from file.o to file.ext (where ext is c, y, or l) into * table * b. insert mapping from file.ext to empty list into table * c. append file.ext on workQ * 4. for each file on the workQ * a. lookup list of dependencies * b. invoke process(name, list_of_dependencies) * 5. for each file argument (after -Idir flags) * a. create a hash table in which to track file names already printed * b. create a linked list to track dependencies yet to print * c. print "foo.o:", insert "foo.o" into hash table * and append "foo.o" to linked list * d. invoke printDependencies() * * general design for process() * ============================ * * 1. open the file * 2. for each line of the file * a. skip leading whitespace * b. if match "#include" * i. skip leading whitespace * ii. if next character is '"' * * collect remaining characters of file name (up to '"') * * append file name to dependency list for this open file * * if file name not already in the master Table * - insert mapping from file name to empty list in master table * - append file name to workQ * 3. close file * * general design for printDependencies() * ====================================== * * 1. while there is still a file in the toProcess linked list * 2. fetch next file from toProcess * 3. lookup up the file in the master table, yielding the linked list of dependencies * 4. iterate over dependenceies * a. if the filename is already in the printed hash table, continue * b. print the filename * c. insert into printed * d. append to toProcess * * Additional helper functions * =========================== * * dirName() - appends trailing '/' if needed * parseFile() - breaks up filename into root and extension * openFile() - attempts to open a filename using the search path defined by the dirs vector. * * Farzwan Mohamed * 2553017M * SP CW2 *This is my own work as defined in the Academic Ethics agreement I have signed. */ #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <string> #include <vector> #include <unordered_map> #include <unordered_set> #include <list> #include <mutex> #include <thread> struct Queue { private: std::list <std::string> lst; std::mutex mut1; public: void push_back(std::string st) { std::unique_lock <std::mutex> lock(mut1); lst.push_back(st); } bool empty() { return lst.empty(); } std::string getFront() { std::unique_lock <std::mutex> lock(mut1); std::string st; if (lst.empty()) st = ""; else { st = lst.front(); lst.pop_front(); } return st; } }; struct HashTable { private: std::unordered_map <std::string, std::list<std::string>> ht; std::mutex mut2; public: bool Found(std::string st) { std::unique_lock<std::mutex> lock(mut2); return ht.find(st)== ht.end(); } std::list <std::string> &operator[](std::string st) { std::unique_lock <std::mutex> lock(mut2); return ht[st]; } void insert(std::pair <std::string, std::list<std::string>> p) { std::unique_lock <std::mutex> lock(mut2); ht.insert(p); } }; std::vector <std::string> dirs; HashTable theTable; Queue workQ; std::string dirName(const char *c_str) { std::string s = c_str; // s takes ownership of the string content by allocating memory for it if (s.back() != '/') { s += '/'; } return s; } std::pair <std::string, std::string> parseFile(const char *c_file) { std::string file = c_file; std::string::size_type pos = file.rfind('.'); if (pos == std::string::npos) { return {file, ""}; } else { return {file.substr(0, pos), file.substr(pos + 1)}; } } // open file using the directory search path constructed in main() static FILE *openFile(const char *file) { FILE *fd; for (unsigned int i = 0; i < dirs.size(); i++) { std::string path = dirs[i] + file; fd = fopen(path.c_str(), "r"); if (fd != NULL) return fd; // return the first file that successfully opens } return NULL; } // process file, looking for #include "foo.h" lines static void process(const char *file, std::list <std::string> *ll) { char buf[4096], name[4096]; // 1. open the file FILE *fd = openFile(file); if (fd == NULL) { fprintf(stderr, "Error opening %s\n", file); exit(-1); } while (fgets(buf, sizeof(buf), fd) != NULL) { char *p = buf; // 2a. skip leading whitespace while (isspace((int) *p)) { p++; } // 2b. if match #include if (strncmp(p, "#include", 8) != 0) { continue; } p += 8; // point to first character past #include // 2bi. skip leading whitespace while (isspace((int) *p)) { p++; } if (*p != '"') { continue; } // 2bii. next character is a " p++; // skip " // 2bii. collect remaining characters of file name char *q = name; while (*p != '\0') { if (*p == '"') { break; } *q++ = *p++; } *q = '\0'; // 2bii. append file name to dependency list ll->push_back({name}); // 2bii. if file name not already in table ... if (!theTable.Found(name)) { continue; } // ... insert mapping from file name to empty list in table ... theTable.insert({name, {}}); // ... append file name to workQ workQ.push_back(name); } // 3. close file fclose(fd); } // iteratively print dependencies static void printDependencies(std::unordered_set <std::string> *printed, std::list <std::string> *toProcess, FILE *fd) { if (!printed || !toProcess || !fd) return; // 1. while there is still a file in the toProcess list while (toProcess->size() > 0) { // 2. fetch next file to process std::string name = toProcess->front(); toProcess->pop_front(); // 3. lookup file in the table, yielding list of dependencies std::list <std::string> *ll = &theTable[name]; // 4. iterate over dependencies for (auto iter = ll->begin(); iter != ll->end(); iter++) { // 4a. if filename is already in the printed table, continue if (printed->find(*iter) != printed->end()) { continue; } // 4b. print filename fprintf(fd, " %s", iter->c_str()); // 4c. insert into printed printed->insert(*iter); // 4d. append to toProcess toProcess->push_back(*iter); } } } void workerThread() { while (!workQ.empty()) { std::string fname = workQ.getFront(); if (fname.empty()) break; if (theTable.Found(fname)) { fprintf(stderr, "Hashmap and queue discrepant\n"); exit(-1); } process(fname.c_str(), &theTable[fname]); } } int main(int argc, char *argv[]) { // 1. look up CPATH in environment char *cpath = getenv("CPATH"); // determine the number of -Idir arguments int i; for (i = 1; i < argc; i++) { if (strncmp(argv[i], "-I", 2) != 0) break; } int start = i; // 2. start assembling dirs vector dirs.push_back(dirName("./")); // always search current directory first for (i = 1; i < start; i++) { dirs.push_back(dirName(argv[i] + 2 /* skip -I */)); } if (cpath != NULL) { std::string str(cpath); std::string::size_type last = 0; std::string::size_type next = 0; while ((next = str.find(":", last)) != std::string::npos) { dirs.push_back(str.substr(last, next - last)); last = next + 1; } dirs.push_back(str.substr(last)); } // 2. finished assembling dirs vector // 3. for each file argument ... for (i = start; i < argc; i++) { std::pair <std::string, std::string> pair = parseFile(argv[i]); if (pair.second != "c" && pair.second != "y" && pair.second != "l") { fprintf(stderr, "Illegal extension: %s - must be .c, .y or .l\n", pair.second.c_str()); return -1; } std::string obj = pair.first + ".o"; // 3a. insert mapping from file.o to file.ext theTable.insert({obj, {argv[i]}}); // 3b. insert mapping from file.ext to empty list theTable.insert({argv[i], {}}); // 3c. append file.ext on workQ workQ.push_back(argv[i]); } char* crawler_threads = getenv("CRAWLER_THREADS"); int crawl = 2; if (crawler_threads != NULL) { crawl = atoi(crawler_threads); } std::vector <std::thread> threads; for (int i = 0; i < crawl; i++) { threads.emplace_back([](){workerThread(); }); } for (int i = 0; i < crawl; i++) { threads[i].join(); } // 5. for each file argument for (i = start; i < argc; i++) { // 5a. create hash table in which to track file names already printed std::unordered_set <std::string> printed; // 5b. create list to track dependencies yet to print std::list <std::string> toProcess; std::pair <std::string, std::string> pair = parseFile(argv[i]); std::string obj = pair.first + ".o"; // 5c. print "foo.o:" ... printf("%s:", obj.c_str()); // 5c. ... insert "foo.o" into hash table and append to list printed.insert(obj); toProcess.push_back(obj); // 5d. invoke printDependencies(&printed, &toProcess, stdout); printf("\n"); } return 0; }
Editor is loading...