Untitled
#include "proj1.h" #include <stdio.h> #include <stdbool.h> #include <ctype.h> #include <string.h> // ~3 hrs 45 mins to complete String Library // ~1 hr 45 min to introduce skeleton and initial state // PHASE I: COMPLETE enum state {PLAINTEXT, ESCAPE_CHAR, ARG1, ARG2, ARG3, // macro arguments UNDEF, PROCESSED, ERROR }; enum arg {THEN, // for if/ifdef statements ELSE, NONE // default state }; // for string library typedef struct { char *data; size_t size; size_t capacity; } string_t; typedef struct { string_t *name; string_t *value; bool has_pound; // for arg substitution } macro; typedef struct { macro **data; // arr list of macros size_t size; size_t capacity; } array_list; typedef struct { string_t *comment_processed_buffer; string_t *plaintext_buffer; string_t *macro_buffer; string_t *arg1_buffer; // reuse for expandafter string_t *arg2_buffer; string_t *arg3_buffer; string_t *expaf_before_buffer; // arg buffers will be utilized during recursion, so store before/after separately string_t *expaf_after_buffer; // ^^ array_list *macro_ds; macro *currentMacro; bool skip_arg; // for the \def macro => skip arg if there is no pound enum arg ARG; } parser_context; //--------------------------------------------------------- // data structures and prototypes //--------------------------------------------------------- // forward declare for opaque struct // typedef struct string_t string_t; /** * Creates a typed string, populating data, size and capacity fields * * @param string input chars to turn into string */ string_t *string_create_from_string(const char *str); /** * Creates an output buffer, populating size and capacity for future * reallocs. */ string_t *string_buffer_create(); /** * Destroys a string_t object * * @param str */ void string_destroy(string_t *str); /** * Concatenates a character to the end of a given string * * @param str pointer to a string object * @param c character to add */ void string_append_char(string_t *str, const char c); /** * Concatenates a given str (added_str) to the first string * given in the args. * * @param base_str ptr to base string * @param added_str ptr to string to be added */ void string_append_string(string_t *base_str, const string_t *added_str); void string_grow(string_t* str); void string_pop_char(string_t *str, const char c); void string_reverse(string_t *str); /** * Called when exiting a macro expansion. 'Resets' the buffer by * setting its size to 0, effectively appending over the previous * data. The null terminator added by string_append_char() will * effectively erase previous data in buffers to allow us to use * strcmp() on the macro names. */ void string_clear(string_t *macro_buffer); void string_remove_newline(string_t *str); /** * Pushes the reversed contents of `src` onto the end of `dest`. * i.e. if src = "Hello", we append "olleH" to `dest`. */ void string_push_reversed(string_t *dest, const string_t *src); /** * State machine */ enum state macro_process_text(char curr_char, enum state curr_state, parser_context *ctx); //--------------------------------------------------------- // macro data structure //--------------------------------------------------------- macro *macro_create(void); macro *macro_create(void) { // initialize macro macro *this_macro = malloc(sizeof(macro)); if (!this_macro) { DIE("Failed malloc. %s", "END"); } // allocating fields this_macro->name = string_buffer_create(); this_macro->value = string_buffer_create(); this_macro->has_pound = false; if (!this_macro->name || !this_macro->value) { DIE("Failed to create string buffers for macro fields. %s", "END"); } return this_macro; } //--------------------------------------------------------- // array list prototypes //--------------------------------------------------------- array_list *list_create(void); void list_destroy(array_list *list); void list_append(array_list *list, macro *new_macro); void list_grow(array_list *list); void list_remove(array_list *list, macro *new_macro); macro *list_retrieve(array_list *list, const char *macro_name); enum comment_state {READING, COMMENT, BLANK_SKIP }; void process_comments(FILE *infile, string_t *buffer); //--------------------------------------------------------- // context prototypes //--------------------------------------------------------- parser_context *context_create(void); parser_context *context_create(void) { parser_context *context = malloc(sizeof(parser_context)); if (!context) { DIE("Failed Malloc. %s", "ERROR"); } // init buffers and list context->comment_processed_buffer = string_buffer_create(); context->plaintext_buffer = string_buffer_create(); context->macro_buffer = string_buffer_create(); context->arg1_buffer = string_buffer_create(); context->arg2_buffer = string_buffer_create(); context->arg3_buffer = string_buffer_create(); context->expaf_before_buffer = string_buffer_create(); context->expaf_after_buffer = string_buffer_create(); context->macro_ds = list_create(); context->skip_arg = false; context->ARG = NONE; // error check if (!context->comment_processed_buffer || !context->plaintext_buffer || !context->macro_buffer || !context->arg1_buffer || !context->arg2_buffer || !context->arg3_buffer || !context->expaf_before_buffer || !context->expaf_after_buffer || !context->macro_ds) { DIE("Failed to initialize buffers or list. %s", "ERROR"); } return context; } void context_destroy(parser_context *context); void context_destroy(parser_context *context) { if (!context) { return; } if (context->comment_processed_buffer) { string_destroy(context->comment_processed_buffer); } if (context->plaintext_buffer) { string_destroy(context->plaintext_buffer); } if (context->macro_buffer) { string_destroy(context->macro_buffer); } if (context->arg1_buffer) { string_destroy(context->arg1_buffer); } if (context->arg2_buffer) { string_destroy(context->arg2_buffer); } if (context->arg3_buffer) { string_destroy(context->arg3_buffer); } if (context->expaf_before_buffer) { string_destroy(context->expaf_before_buffer); } if (context->expaf_after_buffer) { string_destroy(context->expaf_after_buffer); } if (context->macro_ds) { list_destroy(context->macro_ds); } free(context); } bool special_char_escaped(char curr_char, bool reset); int check_brace_balance(char curr_char, bool reset, bool add_one_openbrace); enum state DEF_NAME(array_list *macro_ds, char curr_char, parser_context *ctx); enum state DEF_VALUE(char curr_char, enum state curr_state, array_list *macro_ds, string_t *macro_buffer); enum state UNDEF_VALUE(char curr_char, parser_context *ctx); void macro_substitute_arg(const macro *m, const string_t *arg, parser_context *ctx); enum state USERDEF_VALUE(char curr_char, const macro *m, parser_context *ctx); enum state USERDEF_IGNORE(char curr_char, parser_context *ctx); // EX: /jake{} => no pound so ignore everything after the '{' up until '}' enum state IF_THEN(char curr_char, string_t *arg2_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx); enum state IF_ELSE(char curr_char, string_t* arg3_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx); enum state IF_COND(char curr_char, parser_context *ctx); enum state IFDEF_COND(char curr_char, parser_context *ctx); enum state IFDEF_THEN(char curr_char, parser_context *ctx); enum state IFDEF_ELSE(char curr_char, parser_context *ctx); string_t *read_file(const char *path); enum state INCLUDE_PATH(char curr_char, parser_context *ctx); void expansion_stack(parser_context *ctx, string_t *source_buffer); enum state EXPANDAFTER_BEFORE(char curr_char, parser_context *ctx); enum state EXPANDAFTER_AFTER(char curr_char, parser_context *ctx); //--------------------------------------------------------- // main //--------------------------------------------------------- // PHASE II: Think of what states you'll need to have + implement basic character scanning // static void print_all_macros(const array_list *list) { // if (!list) { // printf("(macro_ds is NULL)\n"); // return; // } // printf("\n=== Macro Database Contents ===\n"); // for (size_t i = 0; i < list->size; i++) { // macro *m = &list->data[i]; // printf("Macro %zu: name=\"%s\", value=\"%s\"\n", // i, // m->name->data ? m->name->data : "(null)", // m->value->data ? m->value->data : "(null)"); // } // printf("=== End of Macro Database ===\n\n"); // } int main(int argc, char *argv[]) { parser_context *ctx = context_create(); string_t *comment_processed_buffer = ctx->comment_processed_buffer; string_t *plaintext_buffer = ctx->plaintext_buffer; // INPUT CASE #1: read from stdin with indirect if (argc == 1) { // read from stdin process_comments(stdin, comment_processed_buffer); } // INPUT CASE #2: read from specified files (can read multiple) else { // process comments: PROCESS ALL FILES FIRST AND THEN CONCATENATE CLEAN FILES!!! for (size_t i = 1; i < argc; i++) { FILE *infile = fopen(argv[i], "r"); if (infile == NULL) { DIE("%s is not a valid file input. Please try again.", argv[i]); } process_comments(infile, comment_processed_buffer); // separate files? // string_append_char(comment_processed_buffer, '\n'); fclose(infile); } } // reverse-string stack approach: string_reverse(comment_processed_buffer); // run expansions expansion_stack(ctx, comment_processed_buffer); // print printf("%s", plaintext_buffer->data); // free context_destroy(ctx); return 0; } //--------------------------------------------------------- // expansion logic //--------------------------------------------------------- void expansion_stack(parser_context *ctx, string_t *source_buffer) { enum state curr_state = PLAINTEXT; size_t stackTop = source_buffer->size; while (stackTop > 0) { stackTop--; char c = source_buffer->data[stackTop]; // point to top of stack source_buffer->size = stackTop; // update buffer size curr_state = macro_process_text(c, curr_state, ctx); // expand macros and update stack stackTop = source_buffer->size; // update pointer to top of stack after expansions } } //--------------------------------------------------------- // string library //--------------------------------------------------------- string_t *string_create_from_string(const char *string) { // DIE("Invalid string passed to string_create(), %s", NULL); if (string == NULL) { return NULL; } // allocate memory string_t *new_str = malloc(sizeof(string_t)); if (!new_str) { return NULL; } // fill fields size_t str_len = strlen(string); new_str->size = str_len; new_str->capacity = str_len + 1; new_str->data = malloc(sizeof(char) * new_str->capacity); // include null terminator!!! if (!new_str->data) { free(new_str->data); free(new_str); return NULL; } strcpy(new_str->data, string); return new_str; } string_t *string_buffer_create(void) { // large init size for efficient starting threshold size_t init_size = 250; string_t *buffer = malloc(sizeof(string_t)); if (!buffer) { return NULL; } buffer->size = 0; buffer->capacity = init_size; buffer->data = malloc(sizeof(char) * buffer->capacity); if (!buffer->data) { free(buffer->data); free(buffer); return NULL; } // init buffer with null terminator --> empty string buffer->data[0] = '\0'; return buffer; } void string_destroy(string_t *str) { if (str) { free(str->data); // free string free(str); } } void string_grow(string_t* str) { str->data = DOUBLE(str->data, str->capacity); if (!str->data) { DIE("Double failed in string_put_char(): %s", str->data); } } void string_append_char(string_t *str, const char c) { size_t curr_capacity = str->capacity; size_t new_arr_len = str->size + 2; // new char + null terminator (excluded by strlen) // grow array if capacity is less than new if (curr_capacity < new_arr_len) { string_grow(str); } // add char and null terminator to string str->data[str->size] = c; str->data[str->size + 1] = '\0'; str->size++; } void string_append_string(string_t *base_str, const string_t *added_str) { size_t curr_capacity = base_str->capacity; size_t new_arr_len = base_str->size + added_str->size + 1; // accounting for null terminator of new str // edge case: string to cat is significantly large than DOUBLE can fulfill while (curr_capacity < new_arr_len) { string_grow(base_str); } // concatenate memcpy(base_str->data + base_str->size, added_str->data, added_str->size); // add null-terminate and update size base_str->data[new_arr_len - 1] = '\0'; base_str->size += added_str->size; } void string_pop_char(string_t *str, const char c) { if (str->size == 0) { DIE("Trying to pop an empty string!: %zu", str->size); } // decrement size and use as index access point str->size--; str->data[str->size] = '\0'; // potential edge case -- implementing automatic shrink /* if (str->size < str->capacity / 4) { str->capacity /= 2; str->data = realloc(str->data, str->capacity); } */ } void string_reverse(string_t *str) { size_t start = 0; size_t end = str->size - 1; while (start < end) { // swap char temp = str->data[start]; str->data[start] = str->data[end]; str->data[end] = temp; start++; end--; } } void string_clear(string_t *str) { if (!str || !str->data) return; str->size = 0; str->data[0] = '\0'; } void string_remove_newline(string_t *str) { if (str == NULL) { DIE("NULL buffer. %s", "ERROR"); } size_t len = strlen(str->data); if (len > 0 && str->data[len - 1] == '\n') { str->data[len - 1] = '\0'; // replace the newline with a null terminator } } void string_push_reversed(string_t *dest, const string_t *src) { // from size down to 1 for (size_t i = src->size; i > 0; i--) { string_append_char(dest, src->data[i - 1]); // remember 0 based ! } } //--------------------------------------------------------- // array list library //--------------------------------------------------------- array_list *list_create(void) { array_list *list = malloc(sizeof(array_list)); if (!list) { DIE("Failed malloc. %s", "END"); } // init. capacity of macro array list size_t init_capacity = 10; // init. fields list->data = malloc(sizeof(macro *) * init_capacity); if (!list->data) { free(list); DIE("Failed malloc. %s", "END"); } list->size = 0; list->capacity = init_capacity; return list; } void list_destroy(array_list *list) { if (!list) DIE("NULL list in list_destroy. %s", "END"); // free each macro's fields in array list for (size_t i = 0; i < list->size; i++) { macro *m = list->data[i]; if (m) { string_destroy(m->name); string_destroy(m->value); free(m); } } free(list->data); free(list); } void list_grow(array_list *list) { list->data = DOUBLE(list->data, list->capacity); if (!list->data) { DIE("Double failed in string_put_char(): %s", "ERROR"); } } void list_append(array_list *list, macro *new_macro) { if (!list) { WARN("Invalid list passed in. %s", "FAILED"); } // check capacity if (list->size == list->capacity) { list_grow(list); } // store fields of macro list->data[list->size] = new_macro; list->size++; } void list_remove(array_list *list, macro *remove_macro) { // linear search for (size_t i = 0; i < list->size; i++) { if (strcmp(list->data[i]->name->data, remove_macro->name->data) == 0) { // free macro fields in arr free(list->data[i]->name); free(list->data[i]->value); // avoid costly shifting by replacing deleted w/ last macro if (i != list->size - 1) { list->data[i] = list->data[list->size - 1]; } list->size--; return; } } // macro was not found DIE("MACRO NOT FOUND. %s", "ERROR"); return; } macro *list_retrieve(array_list *list, const char *macro_name) { for (size_t i = 0; i < list->size; i++) { macro *m = list->data[i]; if (strcmp(m->name->data, macro_name) == 0) { return m; } } // not found return NULL; } //--------------------------------------------------------- // comment processing state machine //--------------------------------------------------------- void process_comments(FILE *infile, string_t *buffer) { enum comment_state state = READING; char ch = 'a'; while ((ch = fgetc(infile)) != EOF) { switch(state) { case READING: // escape state (true is unescaped, false is escaped) bool unescaped = special_char_escaped(ch, false); if (ch == '\n') { // reset escape parity special_char_escaped('\0', true); } // process comment if unescaped is true if (ch == '%' && unescaped) { state = COMMENT; } else { string_append_char(buffer, ch); } break; case COMMENT: unescaped = special_char_escaped(ch, false); if (ch == '\n') { // "first non-blank, non-tab char following newline" state = BLANK_SKIP; // reset and transistion to next line special_char_escaped('\0', true); } break; case BLANK_SKIP: unescaped = special_char_escaped(ch, false); if (ch == '\n') { // string_append_char(buffer, ch); // process second newline } else if (ch == '%') { state = COMMENT; } else if (!isspace(ch)) { // found a char, resume processing state = READING; string_append_char(buffer, ch); } break; } } } //--------------------------------------------------------- // state machine //--------------------------------------------------------- /** * Note: the third parameter is a flag for initializing balance to 1 * instead of 0. In cases where we read the opening brace in ESCAPE_CHAR separately instead of the * state of the caller, we add one to equalize the balance counter. */ int check_brace_balance(char curr_char, bool reset, bool add_one_openbrace) { // encapsulate brace balance check static int balance = 0; if (add_one_openbrace && balance == 0) { balance = 1; // if the front brace has already been read in a diff. state } // reset when we leave the state to prep for next states if (reset) { balance = 0; } else { if (curr_char == '{') { balance++; } else if (curr_char == '}') { balance--; } } return balance; } bool special_char_escaped(char curr_char, bool reset) { // persistence counter for consecutive escapes static int escape_count = 0; if (reset) { // reset and default escape_count = 0; return true; } if (curr_char == '\\') { escape_count++; return (escape_count % 2 == 0); // Return true for even parity, false for odd parity } else { bool isUnescaped = (escape_count % 2 == 0); escape_count = 0;// reset if a non-backslash character is encountered return isUnescaped; } } // \DEF ///////////////////////////////////////////////////////////////////////////// enum state DEF_NAME(array_list *macro_ds, char curr_char, parser_context *ctx) { // subtract 1 to access recently appended macro size_t access_index = macro_ds->size - 1; macro *m = macro_ds->data[access_index]; // ptr to macro struct string_t *arg2_buffer = ctx->arg2_buffer; // intermediate buffer if (curr_char == '}') { // EDGE CASE: check if the user is trying to redefine a macro before undefining if (list_retrieve(macro_ds, arg2_buffer->data)) { DIE("cannot redefine %s", arg2_buffer->data); } string_append_string(m->name, arg2_buffer); string_clear(arg2_buffer); return ARG1; // read the value next } // not alphanumeric, throw error if (isalnum(curr_char) == 0) { DIE("%c is not alphanumeric", curr_char); } string_append_char(arg2_buffer, curr_char); return ARG2; } enum state DEF_VALUE(char curr_char, enum state curr_state, array_list *macro_ds, string_t *macro_buffer) { static bool start = false; // skip the opening brace of the arg bool unescaped = special_char_escaped(curr_char, false); ; // true if no escape char, false if there is int balance = check_brace_balance(curr_char, false, false); // to check brace balance if (start) { // check brace balance if (curr_char == '{' || curr_char == '}') { balance = check_brace_balance(curr_char, false, false); } // check escape characters if (curr_char == '#' && !unescaped) { return ARG1; // odd parity -> escaped pound so DO NOT append } // exit condition: balanced closing brace if (curr_char == '}' && balance == 0) { // output newline // string_append_char(plaintext_buffer, '\n'); // don't store closing brace balance = check_brace_balance(curr_char, true, false); // reset special_char_escaped(curr_char, true); string_clear(macro_buffer); // reset macro_buffer for next iteration start = false; return PLAINTEXT; } // append value size_t insert = macro_ds->size - 1; macro *m = macro_ds->data[insert]; string_t *this_macro_value = m->value; if (curr_char == '#' && unescaped) { m->has_pound = true; } string_append_char(this_macro_value, curr_char); return ARG1; } // NOTE: we always return before reaching here after first iteration if (curr_char == '{' && !start) { // skip the opening brace of value arg start = true; check_brace_balance(curr_char, false, false); } else { // if first char in DEF_VAL is not an opening brace, then semantic error DIE("expected {, found %c", curr_char); // Ex: space between macro args } return ARG1; } void macro_substitute_arg(const macro *m, const string_t *arg, parser_context *ctx) { // read user-def macro value and substitute '#' with the argument --> place back in stack const char *val = m->value->data; size_t val_len = m->value->size; for (size_t i = val_len; i > 0; i--) { char ch = val[i-1]; if (ch == '#') { // insert arg for (size_t j = arg->size; j > 0; j--) { string_append_char(ctx->comment_processed_buffer, arg->data[j - 1]); } } else { string_append_char(ctx->comment_processed_buffer, ch); // normal char -- push as usual } } } // USER DEFINED MACROS ///////////////////////////////////////////////////////////////////////////// enum state USERDEF_VALUE(char curr_char, const macro *m, parser_context *ctx) { string_t *arg1_buffer = ctx->arg1_buffer; string_t *macro_buffer = ctx->macro_buffer; if (curr_char == '{') { // skip this char } else if (curr_char == '}') { // end of argument -> place back in stack macro_substitute_arg(m, arg1_buffer, ctx); string_clear(macro_buffer); string_clear(arg1_buffer); return PLAINTEXT; } else { // accumulate chars into arg_buffer string_append_char(arg1_buffer, curr_char); } return ARG1; } enum state USERDEF_IGNORE(char curr_char, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, true); // to check brace balance array_list *macro_ds = ctx->macro_ds; string_t *macro_buffer = ctx->macro_buffer; macro *m = ctx->currentMacro; // exit condition if (curr_char == '}' && balance == 0) { // reset!! ctx->currentMacro = list_retrieve(macro_ds, macro_buffer->data); m = ctx->currentMacro; string_clear(macro_buffer); string_push_reversed(ctx->comment_processed_buffer, m->value); balance = check_brace_balance(curr_char, true, true); special_char_escaped(curr_char, true); ctx->skip_arg = false; return PLAINTEXT; } return ARG1; } // \UNDEF ///////////////////////////////////////////////////////////////////////////// enum state UNDEF_VALUE(char curr_char, parser_context *ctx) { array_list *macro_ds = ctx->macro_ds; string_t *arg1_buffer = ctx->arg1_buffer; // macro name to be undefined string_t *macro_buffer = ctx->macro_buffer; macro *undef_this = list_retrieve(macro_ds, arg1_buffer->data); if (curr_char == '}') { // check if the given macro name exists if (undef_this) { // defined macro exists --> remove from array list list_remove(macro_ds, undef_this); string_clear(arg1_buffer); // CLEANUP string_clear(macro_buffer); return PLAINTEXT; } else { // failed undef DIE("cannot undef %s", arg1_buffer->data); } } string_append_char(arg1_buffer, curr_char); return ARG1; } // \IF /////////////////////////////////////////////////////////////////////////////// enum state IF_COND(char curr_char, parser_context *ctx) { // here, even if false, we read until the end of arg! int balance = check_brace_balance(curr_char, false, true); static bool first_char = true; if (curr_char == '}' && first_char) { ctx->ARG = ELSE; first_char = true; return ARG2;// false, go to the {ELSE} case } else if (curr_char == '}' && balance == 0) { ctx->ARG = THEN; first_char = true; // RESET as we exit return ARG2; // true and reached the end of ARG3, go to the {THEN} case } first_char = false; // no longer looking for return ARG3; } enum state IF_THEN(char curr_char, string_t *arg2_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx) { // purpose of this function is to read in the THEN conditional --> ELSE will handle the logic int balance = check_brace_balance(curr_char, false, false); // false because we read in open brace static bool first_char = true; // don't write opening brace, but allow thereafter if (first_char && curr_char != '{') { // EDGE CASE: missing argument DIE("Killed in IF_THEN - %s", "missing then statement"); } if (curr_char == '}' && balance == 0) { // minimal balance = check_brace_balance(curr_char, true, false); first_char = true; // reset return ARG1; } if (curr_char == '{' && first_char) { first_char = false; return ARG2; // don't write opening brace, but allow thereafter (ie nested if) } first_char = false; string_append_char(arg2_buffer, curr_char); return ARG2; } enum state IF_ELSE(char curr_char, string_t* arg3_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, false); // false because we read in open brace string_t *arg2_buffer = ctx->arg2_buffer; static bool first_char_arg1 = true; // don't write opening brace, but allow thereafter if (first_char_arg1 && curr_char != '{') { DIE("Killed in IF_ELSE - %s", "missing else statement"); } if (curr_char == '}' && balance == 0) { // we store both arguments but select which to output if (ctx->ARG == ELSE) { string_push_reversed(comment_processed_buffer, arg3_buffer); } else if (ctx->ARG == THEN) { string_push_reversed(comment_processed_buffer, arg2_buffer); } // cleanup string_clear(macro_buffer); string_clear(arg2_buffer); string_clear(arg3_buffer); ctx->ARG = NONE; balance = check_brace_balance(curr_char, true, false); first_char_arg1 = true; return PLAINTEXT; } if (curr_char == '{' && first_char_arg1) { first_char_arg1 = false; return ARG1; // don't write opening brace, but allow thereafter (ie nested if) } first_char_arg1 = false; string_append_char(arg3_buffer, curr_char); return ARG1; } // \IFDEF /////////////////////////////////////////////////////////////////////////////// enum state IFDEF_COND(char curr_char, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, true); static bool first_char = true; // EX: /ifdef{wro}ng} but allow /ifdef{wrong} (closing brace accepted) if ((isalnum(curr_char) == 0 && balance != 0) || (curr_char == '}' && first_char)) { // empty COND argument DIE("%c is not alphanumeric", curr_char); } if (curr_char == '}') { if (list_retrieve(ctx->macro_ds, ctx->arg1_buffer->data)) { ctx->ARG = THEN; } else { // not defined macro ctx->ARG = ELSE; } return ARG2; } string_append_char(ctx->arg1_buffer, curr_char); first_char = false; return ARG3; } enum state IFDEF_THEN(char curr_char, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, false); // add flag off since we read in the open brace static bool first_char = true; if (first_char && curr_char != '{') { // EDGE CASE: missing argument DIE("Killed in IF_THEN - %s", "missing then statement"); } else if (first_char && curr_char == '{') { // EDGE CASE: nested statements -- don't write opening brace first_char = false; return ARG2; } // read in ARG2 but handle logic in the ELSE if (curr_char == '}' && balance == 0) { balance = check_brace_balance(curr_char, true, false); first_char = true; return ARG1; } first_char = false; string_append_char(ctx->arg2_buffer, curr_char); return ARG2; } enum state IFDEF_ELSE(char curr_char, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, false); static bool first_char = true; if (first_char && curr_char != '{') { DIE("Killed in IF_ELSE - %s", "missing else statement"); } else if (first_char && curr_char == '{') { first_char = false; return ARG1; // don't write opening brace } if (curr_char == '}' && balance == 0) { // store both arguments but select which to push to before (dep on conditional) if (ctx->ARG == ELSE) { string_push_reversed(ctx->comment_processed_buffer, ctx->arg3_buffer); } else if (ctx->ARG == THEN) { string_push_reversed(ctx->comment_processed_buffer, ctx->arg2_buffer); } string_clear(ctx->macro_buffer); string_clear(ctx->arg2_buffer); string_clear(ctx->arg3_buffer); string_clear(ctx->arg1_buffer); ctx->ARG = NONE; balance = check_brace_balance(curr_char, true, false); first_char = true; return PLAINTEXT; } first_char = false; string_append_char(ctx->arg3_buffer, curr_char); return ARG1; } // \INCLUDE /////////////////////////////////////////////////////////////////////////////// enum state INCLUDE_PATH(char curr_char, parser_context *ctx) { if (curr_char == '}') { // read file and replace macro with contents string_t *content = read_file(ctx->arg1_buffer->data); if (!content) { DIE("cannot read %s", ctx->arg1_buffer->data); } // push string of file back to buffer string_push_reversed(ctx->comment_processed_buffer, content); string_destroy(content); string_clear(ctx->arg1_buffer); string_clear(ctx->macro_buffer); return PLAINTEXT; } string_append_char(ctx->arg1_buffer, curr_char); return ARG1; } string_t *read_file(const char *path) { FILE *file = fopen(path, "r"); if (!file) { return NULL; } fseek(file, 0, SEEK_END); // move cursor to end of file long file_size = ftell(file); // store size of text if (file_size < 0) { fclose(file); return NULL; } rewind(file); // go back and prepare to read string_t *content = string_buffer_create(); if (!content) { fclose(file); return NULL; } // ensure string buffer is big enough while (content->capacity < (size_t)file_size + 1) { string_grow(content); } size_t bytes_read = fread(content->data, 1, file_size, file); if (bytes_read != (size_t)file_size) { // discrepancy in read --> error string_destroy(content); fclose(file); return NULL; } // null terminate and update size content->data[file_size] = '\0'; content->size = file_size; fclose(file); return content; } // \EXPANDAFTER /////////////////////////////////////////////////////////////////////////////// enum state EXPANDAFTER_BEFORE(char curr_char, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, true); if (curr_char == '}' && balance == 0) { // read and store only -- don't append arg2_buffer to anything yet return ARG1; } string_append_char(ctx->expaf_before_buffer, curr_char); return ARG2; } enum state EXPANDAFTER_AFTER(char curr_char, parser_context *ctx) { int balance = check_brace_balance(curr_char, false, false); static bool first_char = true; if (first_char && curr_char != '{') { DIE("Killed in EXPANDAFTER_AFTER - %s", "missing after statement"); } else if (first_char && curr_char == '{') { first_char = false; return ARG1; // don't write opening brace } if (curr_char == '}' && balance == 0) { // reverse and push AFTER arg --> expand string_reverse(ctx->expaf_after_buffer); string_clear(ctx->macro_buffer); // clear for use in recursive call expansion_stack(ctx, ctx->expaf_after_buffer); // AFTER is now fully expanded // concatenate BEFORE + AFTER string_append_string(ctx->arg1_buffer, ctx->expaf_before_buffer); // BEFORE string_append_string(ctx->arg1_buffer, ctx->expaf_after_buffer); // AFTER // push processed /expandafter to our main stack string_push_reversed(ctx->comment_processed_buffer, ctx->arg1_buffer); // cleanup string_clear(ctx->macro_buffer); string_clear(ctx->arg1_buffer); // total expandafter expansion string_clear(ctx->expaf_before_buffer); // before expansion string_clear(ctx->expaf_after_buffer); // recursed after expansion balance = check_brace_balance(curr_char, true, false); first_char = true; return PLAINTEXT; } first_char = false; string_append_char(ctx->expaf_after_buffer, curr_char); return ARG1; } //--------------------------------------------------------- // state machine (driver function) //--------------------------------------------------------- enum state macro_process_text(char curr_char, enum state curr_state, parser_context *ctx) { // local shortcuts (from refactor) string_t *plaintext_buffer = ctx->plaintext_buffer; // string_t *arg1_buffer = ctx->arg1_buffer; string_t *macro_buffer = ctx->macro_buffer; array_list *macro_ds = ctx->macro_ds; macro *m = ctx->currentMacro; // processing switch (curr_state) { case PLAINTEXT: if (curr_char == '\\') { return ESCAPE_CHAR; } // store in plaintext buffer and pop from stack string_append_char(plaintext_buffer, curr_char); return PLAINTEXT; case ESCAPE_CHAR: /** * \ + whitespace + macro -> plaintext * \MacroName {} -> DIE * \NoArg -> DIE */ static bool startedName = false; // EX: we don't want an error if escaped comment -> \% if (curr_char == '%') { string_append_char(plaintext_buffer, '%'); startedName = false; string_clear(macro_buffer); return PLAINTEXT; } else if (isspace(curr_char)) { // white space after escape --> treat as plaintext if (!startedName) { return PLAINTEXT; // we're reading the macro name but find an escape --> kill } else { DIE("expected {, found '%c'", curr_char); } } else if (curr_char == '{') { // start reading if (!startedName && macro_buffer->size == 0) { DIE("No macro name given before %c", curr_char); // empty macro name } // Now route to the appropriate macro case if (strcmp(macro_buffer->data, "def") == 0) { macro *new_macro = macro_create(); // append by reference list_append(macro_ds, new_macro); // backend setup, parse macro name now return ARG2; } else if (strcmp(macro_buffer->data, "undef") == 0) { return ARG1; } else if (strcmp(macro_buffer->data, "if") == 0) { return ARG3; } else if (strcmp(macro_buffer->data, "ifdef") == 0) { return ARG3; } else if (strcmp(macro_buffer->data, "include") == 0) { return ARG1; } else if (strcmp(macro_buffer->data, "expandafter") == 0) { return ARG2; } else if (list_retrieve(macro_ds, macro_buffer->data)) { // search for user defined macro // found in ds, output ctx->currentMacro = list_retrieve(macro_ds, macro_buffer->data); m = ctx->currentMacro; if (m->has_pound) { return ARG1; // process pound sign } // push back onto stack startedName = false; ctx->skip_arg = true; // activate switch return ARG1; } else { DIE("%s not defined", macro_buffer->data); } } // otherwise non-whitespace, non-brace => part of macro name else { string_append_char(macro_buffer, curr_char); // store char in macro_buffer startedName = true; // started reading name } return ESCAPE_CHAR; case ARG3: if (strcmp(macro_buffer->data, "if") == 0) { return IF_COND(curr_char, ctx); } else if (strcmp(macro_buffer->data, "ifdef") == 0) { return IFDEF_COND(curr_char, ctx); } DIE("Did not enter any cases in ARG3. %s", "ERROR."); case ARG2: // store name if (strcmp(macro_buffer->data, "def") == 0) { return DEF_NAME(macro_ds, curr_char, ctx); } else if (strcmp(macro_buffer->data, "if") == 0) { return IF_THEN(curr_char, ctx->arg2_buffer, macro_buffer, ctx->comment_processed_buffer, ctx); } else if (strcmp(macro_buffer->data, "ifdef") == 0) { return IFDEF_THEN(curr_char, ctx); } else if (strcmp(macro_buffer->data, "expandafter") == 0) { return EXPANDAFTER_BEFORE(curr_char, ctx); } DIE("Did not enter any cases in ARG2. %s", "ERROR."); return ARG1; case ARG1: // store value if (strcmp(macro_buffer->data, "def") == 0) { return DEF_VALUE(curr_char, curr_state, macro_ds, macro_buffer); } else if (strcmp(macro_buffer->data, "undef") == 0) { return UNDEF_VALUE(curr_char, ctx); } else if (ctx->skip_arg) { // no pound present in \def, so skip the arguments if any return USERDEF_IGNORE(curr_char, ctx); } else if (strcmp(macro_buffer->data, "if") == 0) { return IF_ELSE(curr_char, ctx->arg1_buffer, macro_buffer, ctx->comment_processed_buffer, ctx); } else if (strcmp(macro_buffer->data, "ifdef") == 0) { return IFDEF_ELSE(curr_char, ctx); } else if (strcmp(macro_buffer->data, "include") == 0) { return INCLUDE_PATH(curr_char, ctx); } else if (strcmp(macro_buffer->data, "expandafter") == 0) { return EXPANDAFTER_AFTER(curr_char, ctx); } else { // default: user-defined macro with pound return USERDEF_VALUE(curr_char, m, ctx); } DIE("Did not enter any cases in ARG1. %s", "ERROR."); return PLAINTEXT; case PROCESSED: // OUTPUT: // store_to_output(curr_state, plaintext_buffer, macro_ds); return PROCESSED; default: return ERROR; } }
Leave a Comment