Untitled
unknown
plain_text
3 years ago
12 kB
9
Indexable
/*
* usage: ./dependencyDiscoverer [-Idir] ... file.c|file.l|file.y ...
*
* processes the c/yacc/lex source file arguments, outputting the dependencies
* between the corresponding .o file, the .c source file, and any included
* .h files
*
* each .h file is also processed to yield a dependency between it and any
* included .h files
*
* these dependencies are written to standard output in a form compatible with
* make; for example, assume that foo.c includes inc1.h, and inc1.h includes
* inc2.h and inc3.h; this results in
*
* foo.o: foo.c inc1.h inc2.h inc3.h
*
* note that system includes (i.e. those in angle brackets) are NOT processed
*
* dependencyDiscoverer uses the CPATH environment variable, which can contain a
* set of directories separated by ':' to find included files
* if any additional directories are specified in the command line,
* these are prepFound to those in CPATH, left to right
*
* for example, if CPATH is "/home/user/include:/usr/local/group/include",
* and if "-Ifoo/bar/include" is specified on the command line, then when
* processing
* #include "x.h"
* x.h will be located by searching for the following files in this order
*
* ./x.h
* foo/bar/include/x.h
* /home/user/include/x.h
* /usr/local/group/include/x.h
*/
/*
* general design of main()
* ========================
* There are three globally accessible variables:
* - dirs: a vector storing the directories to search for headers
* - theTable: a hash table mapping file names to a list of dependent file names
* - workQ: a list of file names that have to be processed
*
* 1. look up CPATH in environment
* 2. assemble dirs vector from ".", any -Idir flags, and fields in CPATH
* (if it is defined)
* 3. for each file argument (after -Idir flags)
* a. insert mapping from file.o to file.ext (where ext is c, y, or l) into
* table
* b. insert mapping from file.ext to empty list into table
* c. append file.ext on workQ
* 4. for each file on the workQ
* a. lookup list of dependencies
* b. invoke process(name, list_of_dependencies)
* 5. for each file argument (after -Idir flags)
* a. create a hash table in which to track file names already printed
* b. create a linked list to track dependencies yet to print
* c. print "foo.o:", insert "foo.o" into hash table
* and append "foo.o" to linked list
* d. invoke printDependencies()
*
* general design for process()
* ============================
*
* 1. open the file
* 2. for each line of the file
* a. skip leading whitespace
* b. if match "#include"
* i. skip leading whitespace
* ii. if next character is '"'
* * collect remaining characters of file name (up to '"')
* * append file name to dependency list for this open file
* * if file name not already in the master Table
* - insert mapping from file name to empty list in master table
* - append file name to workQ
* 3. close file
*
* general design for printDependencies()
* ======================================
*
* 1. while there is still a file in the toProcess linked list
* 2. fetch next file from toProcess
* 3. lookup up the file in the master table, yielding the linked list of dependencies
* 4. iterate over dependenceies
* a. if the filename is already in the printed hash table, continue
* b. print the filename
* c. insert into printed
* d. append to toProcess
*
* Additional helper functions
* ===========================
*
* dirName() - appends trailing '/' if needed
* parseFile() - breaks up filename into root and extension
* openFile() - attempts to open a filename using the search path defined by the dirs vector.
*
* Farzwan Mohamed
* 2553017M
* SP CW2
*This is my own work as defined in the Academic Ethics agreement I have signed.
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <list>
#include <mutex>
#include <thread>
struct Queue {
private:
std::list <std::string> lst;
std::mutex mut1;
public:
void push_back(std::string st) {
std::unique_lock <std::mutex> lock(mut1);
lst.push_back(st);
}
bool empty() {
return lst.empty();
}
std::string getFront() {
std::unique_lock <std::mutex> lock(mut1);
std::string st;
if (lst.empty())
st = "";
else {
st = lst.front();
lst.pop_front();
}
return st;
}
};
struct HashTable {
private:
std::unordered_map <std::string, std::list<std::string>> ht;
std::mutex mut2;
public:
bool Found(std::string st) {
std::unique_lock<std::mutex> lock(mut2);
return ht.find(st)== ht.end();
}
std::list <std::string> &operator[](std::string st) {
std::unique_lock <std::mutex> lock(mut2);
return ht[st];
}
void insert(std::pair <std::string, std::list<std::string>> p) {
std::unique_lock <std::mutex> lock(mut2);
ht.insert(p);
}
};
std::vector <std::string> dirs;
HashTable theTable;
Queue workQ;
std::string dirName(const char *c_str) {
std::string s = c_str; // s takes ownership of the string content by allocating memory for it
if (s.back() != '/') { s += '/'; }
return s;
}
std::pair <std::string, std::string> parseFile(const char *c_file) {
std::string file = c_file;
std::string::size_type pos = file.rfind('.');
if (pos == std::string::npos) {
return {file, ""};
} else {
return {file.substr(0, pos), file.substr(pos + 1)};
}
}
// open file using the directory search path constructed in main()
static FILE *openFile(const char *file) {
FILE *fd;
for (unsigned int i = 0; i < dirs.size(); i++) {
std::string path = dirs[i] + file;
fd = fopen(path.c_str(), "r");
if (fd != NULL)
return fd; // return the first file that successfully opens
}
return NULL;
}
// process file, looking for #include "foo.h" lines
static void process(const char *file, std::list <std::string> *ll) {
char buf[4096], name[4096];
// 1. open the file
FILE *fd = openFile(file);
if (fd == NULL) {
fprintf(stderr, "Error opening %s\n", file);
exit(-1);
}
while (fgets(buf, sizeof(buf), fd) != NULL) {
char *p = buf;
// 2a. skip leading whitespace
while (isspace((int) *p)) { p++; }
// 2b. if match #include
if (strncmp(p, "#include", 8) != 0) { continue; }
p += 8; // point to first character past #include
// 2bi. skip leading whitespace
while (isspace((int) *p)) { p++; }
if (*p != '"') { continue; }
// 2bii. next character is a "
p++; // skip "
// 2bii. collect remaining characters of file name
char *q = name;
while (*p != '\0') {
if (*p == '"') { break; }
*q++ = *p++;
}
*q = '\0';
// 2bii. append file name to dependency list
ll->push_back({name});
// 2bii. if file name not already in table ...
if (!theTable.Found(name)) { continue; }
// ... insert mapping from file name to empty list in table ...
theTable.insert({name, {}});
// ... append file name to workQ
workQ.push_back(name);
}
// 3. close file
fclose(fd);
}
// iteratively print dependencies
static void printDependencies(std::unordered_set <std::string> *printed,
std::list <std::string> *toProcess,
FILE *fd) {
if (!printed || !toProcess || !fd) return;
// 1. while there is still a file in the toProcess list
while (toProcess->size() > 0) {
// 2. fetch next file to process
std::string name = toProcess->front();
toProcess->pop_front();
// 3. lookup file in the table, yielding list of dependencies
std::list <std::string> *ll = &theTable[name];
// 4. iterate over dependencies
for (auto iter = ll->begin(); iter != ll->end(); iter++) {
// 4a. if filename is already in the printed table, continue
if (printed->find(*iter) != printed->end()) { continue; }
// 4b. print filename
fprintf(fd, " %s", iter->c_str());
// 4c. insert into printed
printed->insert(*iter);
// 4d. append to toProcess
toProcess->push_back(*iter);
}
}
}
void workerThread() {
while (!workQ.empty()) {
std::string fname = workQ.getFront();
if (fname.empty())
break;
if (theTable.Found(fname)) {
fprintf(stderr, "Hashmap and queue discrepant\n");
exit(-1);
}
process(fname.c_str(), &theTable[fname]);
}
}
int main(int argc, char *argv[]) {
// 1. look up CPATH in environment
char *cpath = getenv("CPATH");
// determine the number of -Idir arguments
int i;
for (i = 1; i < argc; i++) {
if (strncmp(argv[i], "-I", 2) != 0)
break;
}
int start = i;
// 2. start assembling dirs vector
dirs.push_back(dirName("./")); // always search current directory first
for (i = 1; i < start; i++) {
dirs.push_back(dirName(argv[i] + 2 /* skip -I */));
}
if (cpath != NULL) {
std::string str(cpath);
std::string::size_type last = 0;
std::string::size_type next = 0;
while ((next = str.find(":", last)) != std::string::npos) {
dirs.push_back(str.substr(last, next - last));
last = next + 1;
}
dirs.push_back(str.substr(last));
}
// 2. finished assembling dirs vector
// 3. for each file argument ...
for (i = start; i < argc; i++) {
std::pair <std::string, std::string> pair = parseFile(argv[i]);
if (pair.second != "c" && pair.second != "y" && pair.second != "l") {
fprintf(stderr, "Illegal extension: %s - must be .c, .y or .l\n",
pair.second.c_str());
return -1;
}
std::string obj = pair.first + ".o";
// 3a. insert mapping from file.o to file.ext
theTable.insert({obj, {argv[i]}});
// 3b. insert mapping from file.ext to empty list
theTable.insert({argv[i], {}});
// 3c. append file.ext on workQ
workQ.push_back(argv[i]);
}
char* crawler_threads = getenv("CRAWLER_THREADS");
int crawl = 2;
if (crawler_threads != NULL) {
crawl = atoi(crawler_threads);
}
std::vector <std::thread> threads;
for (int i = 0; i < crawl; i++) {
threads.emplace_back([](){workerThread(); });
}
for (int i = 0; i < crawl; i++) {
threads[i].join();
}
// 5. for each file argument
for (i = start; i < argc; i++) {
// 5a. create hash table in which to track file names already printed
std::unordered_set <std::string> printed;
// 5b. create list to track dependencies yet to print
std::list <std::string> toProcess;
std::pair <std::string, std::string> pair = parseFile(argv[i]);
std::string obj = pair.first + ".o";
// 5c. print "foo.o:" ...
printf("%s:", obj.c_str());
// 5c. ... insert "foo.o" into hash table and append to list
printed.insert(obj);
toProcess.push_back(obj);
// 5d. invoke
printDependencies(&printed, &toProcess, stdout);
printf("\n");
}
return 0;
}
Editor is loading...