
14 days ago
43 kB
#include "proj1.h"
#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>

// ~3 hrs 45 mins to complete String Library
// ~1 hr 45 min to introduce skeleton and initial state

enum state {PLAINTEXT,
            ARG1, ARG2, ARG3, // macro arguments

enum arg   {THEN, // for if/ifdef statements
            NONE // default state

// for string library
typedef struct {
    char *data;
    size_t size;
    size_t capacity;
} string_t;

typedef struct {
    string_t *name;
    string_t *value;
    bool has_pound; // for arg substitution
} macro; 

typedef struct {
    macro **data; // arr list of macros
    size_t size;
    size_t capacity;
} array_list;

typedef struct {
    string_t *comment_processed_buffer;
    string_t *plaintext_buffer;
    string_t *macro_buffer;
    string_t *arg1_buffer; // reuse for expandafter
    string_t *arg2_buffer;
    string_t *arg3_buffer;
    string_t *expaf_before_buffer; // arg buffers will be utilized during recursion, so store before/after separately
    string_t *expaf_after_buffer; // ^^
    array_list *macro_ds;
    macro *currentMacro;
    bool skip_arg; // for the \def macro => skip arg if there is no pound
    enum arg ARG;
} parser_context;

// data structures and prototypes
// forward declare for opaque struct
// typedef struct string_t string_t;

 * Creates a typed string, populating data, size and capacity fields
 * @param string input chars to turn into string
string_t *string_create_from_string(const char *str);

 * Creates an output buffer, populating size and capacity for future
 * reallocs. 
string_t *string_buffer_create();

 * Destroys a string_t object
 * @param str
void string_destroy(string_t *str);

 * Concatenates a character to the end of a given string
 * @param str pointer to a string object
 * @param c character to add
void string_append_char(string_t *str, const char c);

 * Concatenates a given str (added_str) to the first string
 * given in the args. 
 * @param base_str ptr to base string
 * @param added_str ptr to string to be added
void string_append_string(string_t *base_str, const string_t *added_str);

void string_grow(string_t* str);

void string_pop_char(string_t *str, const char c);

void string_reverse(string_t *str);

 * Called when exiting a macro expansion. 'Resets' the buffer by 
 * setting its size to 0, effectively appending over the previous
 * data. The null terminator added by string_append_char() will 
 * effectively erase previous data in buffers to allow us to use
 * strcmp() on the macro names.
void string_clear(string_t *macro_buffer);

void string_remove_newline(string_t *str);

 * Pushes the reversed contents of `src` onto the end of `dest`.
 * i.e. if src = "Hello", we append "olleH" to `dest`.
void string_push_reversed(string_t *dest, const string_t *src);

* State machine 
enum state macro_process_text(char curr_char, enum state curr_state, parser_context *ctx);

// macro data structure
macro *macro_create(void);
macro *macro_create(void) {
    // initialize macro
    macro *this_macro = malloc(sizeof(macro));
    if (!this_macro) {
        DIE("Failed malloc. %s", "END");
    // allocating fields
    this_macro->name = string_buffer_create();
    this_macro->value = string_buffer_create();
    this_macro->has_pound = false;
    if (!this_macro->name || !this_macro->value) {
        DIE("Failed to create string buffers for macro fields. %s", "END");
    return this_macro;
// array list prototypes
array_list *list_create(void);
void list_destroy(array_list *list);
void list_append(array_list *list, macro *new_macro);
void list_grow(array_list *list);
void list_remove(array_list *list, macro *new_macro);
macro *list_retrieve(array_list *list, const char *macro_name);

enum comment_state {READING,
void process_comments(FILE *infile, string_t *buffer);

// context prototypes
parser_context *context_create(void);
parser_context *context_create(void) {
    parser_context *context = malloc(sizeof(parser_context));
    if (!context) {
        DIE("Failed Malloc. %s", "ERROR");

    // init buffers and list
    context->comment_processed_buffer = string_buffer_create();
    context->plaintext_buffer = string_buffer_create();
    context->macro_buffer = string_buffer_create();
    context->arg1_buffer = string_buffer_create();
    context->arg2_buffer = string_buffer_create();
    context->arg3_buffer = string_buffer_create();
    context->expaf_before_buffer = string_buffer_create();
    context->expaf_after_buffer = string_buffer_create();
    context->macro_ds = list_create();
    context->skip_arg = false;
    context->ARG = NONE;
    // error check
    if (!context->comment_processed_buffer || !context->plaintext_buffer || 
        !context->macro_buffer || !context->arg1_buffer || 
        !context->arg2_buffer || !context->arg3_buffer || 
        !context->expaf_before_buffer || !context->expaf_after_buffer
        || !context->macro_ds) {
        DIE("Failed to initialize buffers or list. %s", "ERROR");

    return context;

void context_destroy(parser_context *context);
void context_destroy(parser_context *context) {
    if (!context) {
    if (context->comment_processed_buffer) {
    if (context->plaintext_buffer) {
    if (context->macro_buffer) {
    if (context->arg1_buffer) {
    if (context->arg2_buffer) {
    if (context->arg3_buffer) {
    if (context->expaf_before_buffer) {
    if (context->expaf_after_buffer) {
    if (context->macro_ds) {

bool special_char_escaped(char curr_char, bool reset);
int check_brace_balance(char curr_char, bool reset, bool add_one_openbrace);

enum state DEF_NAME(array_list *macro_ds, char curr_char, parser_context *ctx);
enum state DEF_VALUE(char curr_char, enum state curr_state, array_list *macro_ds, string_t *macro_buffer);

enum state UNDEF_VALUE(char curr_char, parser_context *ctx);
void macro_substitute_arg(const macro *m, const string_t *arg, parser_context *ctx);

enum state USERDEF_VALUE(char curr_char, const macro *m, parser_context *ctx);
enum state USERDEF_IGNORE(char curr_char, parser_context *ctx); // EX: /jake{} => no pound so ignore everything after the '{' up until '}'

enum state IF_THEN(char curr_char, string_t *arg2_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx);
enum state IF_ELSE(char curr_char, string_t* arg3_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx);
enum state IF_COND(char curr_char, parser_context *ctx);

enum state IFDEF_COND(char curr_char, parser_context *ctx);
enum state IFDEF_THEN(char curr_char, parser_context *ctx);
enum state IFDEF_ELSE(char curr_char, parser_context *ctx);

string_t *read_file(const char *path);
enum state INCLUDE_PATH(char curr_char, parser_context *ctx);

void expansion_stack(parser_context *ctx, string_t *source_buffer); 
enum state EXPANDAFTER_BEFORE(char curr_char, parser_context *ctx);
enum state EXPANDAFTER_AFTER(char curr_char, parser_context *ctx);

// main
// PHASE II: Think of what states you'll need to have + implement basic character scanning
// static void print_all_macros(const array_list *list) {
//     if (!list) {
//         printf("(macro_ds is NULL)\n");
//         return;
//     }
//     printf("\n=== Macro Database Contents ===\n");
//     for (size_t i = 0; i < list->size; i++) {
//         macro *m = &list->data[i];
//         printf("Macro %zu: name=\"%s\", value=\"%s\"\n",
//                i,
//                m->name->data ? m->name->data : "(null)",
//                m->value->data ? m->value->data : "(null)");
//     }
//     printf("=== End of Macro Database ===\n\n");
// }

int main(int argc, char *argv[]) {
    parser_context *ctx = context_create();
    string_t *comment_processed_buffer = ctx->comment_processed_buffer;
    string_t *plaintext_buffer = ctx->plaintext_buffer;

    // INPUT CASE #1: read from stdin with indirect
    if (argc == 1) {
        // read from stdin
        process_comments(stdin, comment_processed_buffer);
    // INPUT CASE #2: read from specified files (can read multiple)
    else {
        for (size_t i = 1; i < argc; i++) {
            FILE *infile = fopen(argv[i], "r");
            if (infile == NULL) {
                DIE("%s is not a valid file input. Please try again.", argv[i]);
            process_comments(infile, comment_processed_buffer);
            // separate files?
            // string_append_char(comment_processed_buffer, '\n');

    // reverse-string stack approach: 

    // run expansions
    expansion_stack(ctx, comment_processed_buffer);

    // print
    printf("%s", plaintext_buffer->data);

    // free

    return 0;

// expansion logic
void expansion_stack(parser_context *ctx, string_t *source_buffer) {
    enum state curr_state = PLAINTEXT;
    size_t stackTop = source_buffer->size;

    while (stackTop > 0) {
        char c = source_buffer->data[stackTop]; // point to top of stack
        source_buffer->size = stackTop; // update buffer size
        curr_state = macro_process_text(c, curr_state, ctx);    // expand macros and update stack
        stackTop = source_buffer->size; // update pointer to top of stack after expansions
// string library
string_t *string_create_from_string(const char *string) {
    // DIE("Invalid string passed to string_create(), %s", NULL);
    if (string == NULL) {
        return NULL;

    // allocate memory
    string_t *new_str = malloc(sizeof(string_t));
    if (!new_str) {
        return NULL;
    // fill fields
    size_t str_len = strlen(string);

    new_str->size = str_len;
    new_str->capacity = str_len + 1;

    new_str->data = malloc(sizeof(char) * new_str->capacity); // include null terminator!!!
    if (!new_str->data) {
        return NULL;
    strcpy(new_str->data, string);

    return new_str; 

string_t *string_buffer_create(void) {
    // large init size for efficient starting threshold
    size_t init_size = 250;
    string_t *buffer = malloc(sizeof(string_t));
    if (!buffer) {
        return NULL;
    buffer->size = 0;
    buffer->capacity = init_size;
    buffer->data = malloc(sizeof(char) * buffer->capacity);
    if (!buffer->data) {
        return NULL;

    // init buffer with null terminator --> empty string
    buffer->data[0] = '\0';

    return buffer;

void string_destroy(string_t *str) {
    if (str) {
        free(str->data); // free string

void string_grow(string_t* str) {
    str->data = DOUBLE(str->data, str->capacity);
    if (!str->data) {
        DIE("Double failed in string_put_char(): %s", str->data);

void string_append_char(string_t *str, const char c) {
    size_t curr_capacity = str->capacity;
    size_t new_arr_len = str->size + 2; // new char + null terminator (excluded by strlen)
    // grow array if capacity is less than new
    if (curr_capacity < new_arr_len) { 

    // add char and null terminator to string
    str->data[str->size] = c;
    str->data[str->size + 1] = '\0';

void string_append_string(string_t *base_str, const string_t *added_str) {
    size_t curr_capacity = base_str->capacity;
    size_t new_arr_len = base_str->size + added_str->size + 1; // accounting for null terminator of new str

    // edge case: string to cat is significantly large than DOUBLE can fulfill
    while (curr_capacity < new_arr_len) {

    // concatenate
    memcpy(base_str->data + base_str->size, added_str->data, added_str->size);

    // add null-terminate and update size
    base_str->data[new_arr_len - 1] = '\0';
    base_str->size += added_str->size;

void string_pop_char(string_t *str, const char c) {
    if (str->size == 0) {
        DIE("Trying to pop an empty string!: %zu", str->size);

    // decrement size and use as index access point
    str->data[str->size] = '\0';

    // potential edge case -- implementing automatic shrink
    if (str->size < str->capacity / 4) {
        str->capacity /= 2;
        str->data = realloc(str->data, str->capacity);

void string_reverse(string_t *str) {
    size_t start = 0;
    size_t end = str->size - 1;
    while (start < end) {
        // swap
        char temp = str->data[start];
        str->data[start] = str->data[end];
        str->data[end] = temp;


void string_clear(string_t *str) {
    if (!str || !str->data) return;
    str->size = 0;
    str->data[0] = '\0';

void string_remove_newline(string_t *str) {
    if (str == NULL) {
        DIE("NULL buffer. %s", "ERROR");

    size_t len = strlen(str->data);
    if (len > 0 && str->data[len - 1] == '\n') {
        str->data[len - 1] = '\0'; // replace the newline with a null terminator


void string_push_reversed(string_t *dest, const string_t *src) {
    // from size down to 1
    for (size_t i = src->size; i > 0; i--) {
        string_append_char(dest, src->data[i - 1]); // remember 0 based !
// array list library
array_list *list_create(void) {
    array_list *list = malloc(sizeof(array_list));
    if (!list) {
        DIE("Failed malloc. %s", "END");

    // init. capacity of macro array list
    size_t init_capacity = 10; 

    // init. fields
    list->data = malloc(sizeof(macro *) * init_capacity);
    if (!list->data) {
        DIE("Failed malloc. %s", "END");
    list->size = 0;
    list->capacity = init_capacity;

    return list; 

void list_destroy(array_list *list) {
    if (!list) DIE("NULL list in list_destroy. %s", "END");
    // free each macro's fields in array list
    for (size_t i = 0; i < list->size; i++) {
        macro *m = list->data[i];
        if (m) {

void list_grow(array_list *list) {
    list->data = DOUBLE(list->data, list->capacity);
    if (!list->data) {
        DIE("Double failed in string_put_char(): %s", "ERROR");

void list_append(array_list *list, macro *new_macro) {
    if (!list) {
        WARN("Invalid list passed in. %s", "FAILED");

    // check capacity
    if (list->size == list->capacity) {

    // store fields of macro
    list->data[list->size] = new_macro;

void list_remove(array_list *list, macro *remove_macro) {
    // linear search
    for (size_t i = 0; i < list->size; i++) {
        if (strcmp(list->data[i]->name->data, remove_macro->name->data) == 0) {
            // free macro fields in arr

            // avoid costly shifting by replacing deleted w/ last macro
            if (i != list->size - 1) {
                list->data[i] = list->data[list->size - 1];

    // macro was not found

macro *list_retrieve(array_list *list, const char *macro_name) {
    for (size_t i = 0; i < list->size; i++) {
        macro *m = list->data[i];
        if (strcmp(m->name->data, macro_name) == 0) {
            return m;
    // not found
    return NULL;    

// comment processing state machine
void process_comments(FILE *infile, string_t *buffer) {
    enum comment_state state = READING; 
    char ch = 'a';

    while ((ch = fgetc(infile)) != EOF) {
        switch(state) {
            case READING:
                // escape state (true is unescaped, false is escaped)
                bool unescaped = special_char_escaped(ch, false);

                if (ch == '\n') {
                    // reset escape parity
                    special_char_escaped('\0', true);

                // process comment if unescaped is true
                if (ch == '%' && unescaped) {
                    state = COMMENT;
                } else {
                    string_append_char(buffer, ch);
            case COMMENT:
                unescaped = special_char_escaped(ch, false);
                if (ch == '\n') {
                    // "first non-blank, non-tab char following newline"
                    state = BLANK_SKIP;
                    // reset and transistion to next line
                    special_char_escaped('\0', true);

            case BLANK_SKIP:
                unescaped = special_char_escaped(ch, false);
                if (ch == '\n') {
                   // string_append_char(buffer, ch); // process second newline
                } else if (ch == '%') {
                    state = COMMENT;
                } else if (!isspace(ch)) {
                    // found a char, resume processing
                    state = READING;
                    string_append_char(buffer, ch);

// state machine
 * Note: the third parameter is a flag for initializing balance to 1 
 * instead of 0. In cases where we  read the opening brace in ESCAPE_CHAR separately instead of the
 * state of the caller, we add one to equalize the balance counter.
int check_brace_balance(char curr_char, bool reset, bool add_one_openbrace) {
    // encapsulate brace balance check
    static int balance = 0;
    if (add_one_openbrace && balance == 0) {
        balance = 1; // if the front brace has already been read in a diff. state

    // reset when we leave the state to prep for next states
    if (reset) {
        balance = 0;  
    } else {
        if (curr_char == '{') {
        } else if (curr_char == '}') {

    return balance;

bool special_char_escaped(char curr_char, bool reset) {
    // persistence counter for consecutive escapes
    static int escape_count = 0; 

    if (reset) {
        // reset and default
        escape_count = 0; 
        return true; 

    if (curr_char == '\\') {
        return (escape_count % 2 == 0); // Return true for even parity, false for odd parity
    } else {
        bool isUnescaped = (escape_count % 2 == 0);
        escape_count = 0;// reset if a non-backslash character is encountered
        return isUnescaped;

// \DEF /////////////////////////////////////////////////////////////////////////////
enum state DEF_NAME(array_list *macro_ds, char curr_char, parser_context *ctx) {
    // subtract 1 to access recently appended macro
    size_t access_index = macro_ds->size - 1;
    macro *m = macro_ds->data[access_index]; // ptr to macro struct
    string_t *arg2_buffer = ctx->arg2_buffer; // intermediate buffer

    if (curr_char == '}') {
        // EDGE CASE: check if the user is trying to redefine a macro before undefining
        if (list_retrieve(macro_ds, arg2_buffer->data)) {
            DIE("cannot redefine %s", arg2_buffer->data);
        string_append_string(m->name, arg2_buffer);
        return ARG1; // read the value next
    // not alphanumeric, throw error
    if (isalnum(curr_char) == 0) {
        DIE("%c is not alphanumeric", curr_char);
    string_append_char(arg2_buffer, curr_char);
    return ARG2; 

enum state DEF_VALUE(char curr_char, enum state curr_state, array_list *macro_ds, string_t *macro_buffer) {
    static bool start = false; // skip the opening brace of the arg
    bool unescaped = special_char_escaped(curr_char, false); ; // true if no escape char, false if there is
    int balance = check_brace_balance(curr_char, false, false); // to check brace balance

    if (start) {
        // check brace balance
        if (curr_char == '{' || curr_char == '}') {
            balance = check_brace_balance(curr_char, false, false);

        // check escape characters
        if (curr_char == '#' && !unescaped) {
            return ARG1; // odd parity -> escaped pound so DO NOT append

        // exit condition: balanced closing brace
        if (curr_char == '}' && balance == 0) {
            // output newline
            // string_append_char(plaintext_buffer, '\n');  

            // don't store closing brace
            balance = check_brace_balance(curr_char, true, false); // reset
            special_char_escaped(curr_char, true);
            string_clear(macro_buffer); // reset macro_buffer for next iteration
            start = false;
            return PLAINTEXT;

        // append value
        size_t insert = macro_ds->size - 1;
        macro *m = macro_ds->data[insert];
        string_t *this_macro_value = m->value;
        if (curr_char == '#' && unescaped) {
            m->has_pound = true; 

        string_append_char(this_macro_value, curr_char);
        return ARG1;

    // NOTE: we always return before reaching here after first iteration
    if (curr_char == '{' && !start) {
        // skip the opening brace of value arg
        start = true; 
        check_brace_balance(curr_char, false, false);
    } else {
        // if first char in DEF_VAL is not an opening brace, then semantic error
        DIE("expected {, found %c", curr_char); // Ex: space between macro args
    return ARG1;

void macro_substitute_arg(const macro *m, const string_t *arg, parser_context *ctx) {
    // read user-def macro value and substitute '#' with the argument --> place back in stack
    const char *val = m->value->data; 
    size_t val_len = m->value->size;
    for (size_t i = val_len; i > 0; i--) {
        char ch = val[i-1];
        if (ch == '#') {
            // insert arg
            for (size_t j = arg->size; j > 0; j--) {
                string_append_char(ctx->comment_processed_buffer, arg->data[j - 1]);
        } else {
            string_append_char(ctx->comment_processed_buffer, ch); // normal char -- push as usual

// USER DEFINED MACROS /////////////////////////////////////////////////////////////////////////////
enum state USERDEF_VALUE(char curr_char, const macro *m, parser_context *ctx) {
    string_t *arg1_buffer = ctx->arg1_buffer;
    string_t *macro_buffer = ctx->macro_buffer;

    if (curr_char == '{') {
        // skip this char
    } else if (curr_char == '}') {
        // end of argument -> place back in stack 
        macro_substitute_arg(m, arg1_buffer, ctx);
        return PLAINTEXT;
    } else {
        // accumulate chars into arg_buffer
        string_append_char(arg1_buffer, curr_char);
    return ARG1;

enum state USERDEF_IGNORE(char curr_char, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, true); // to check brace balance
    array_list *macro_ds = ctx->macro_ds;
    string_t *macro_buffer = ctx->macro_buffer;
    macro *m = ctx->currentMacro;
    // exit condition
    if (curr_char == '}' && balance == 0) {
        // reset!!
        ctx->currentMacro = list_retrieve(macro_ds, macro_buffer->data);
        m = ctx->currentMacro;
        string_push_reversed(ctx->comment_processed_buffer, m->value);
        balance = check_brace_balance(curr_char, true, true); 
        special_char_escaped(curr_char, true);
        ctx->skip_arg = false;

        return PLAINTEXT;
    return ARG1; 

// \UNDEF /////////////////////////////////////////////////////////////////////////////
enum state UNDEF_VALUE(char curr_char, parser_context *ctx) {
    array_list *macro_ds = ctx->macro_ds; 
    string_t *arg1_buffer = ctx->arg1_buffer; // macro name to be undefined
    string_t *macro_buffer = ctx->macro_buffer;
    macro *undef_this = list_retrieve(macro_ds, arg1_buffer->data);

    if (curr_char == '}') {
        // check if the given macro name exists
        if (undef_this) {
            // defined macro exists --> remove from array list
            list_remove(macro_ds, undef_this);
            string_clear(arg1_buffer); // CLEANUP
            return PLAINTEXT;
        } else {
            // failed undef
            DIE("cannot undef %s", arg1_buffer->data);
    string_append_char(arg1_buffer, curr_char);
    return ARG1;     

// \IF ///////////////////////////////////////////////////////////////////////////////
enum state IF_COND(char curr_char, parser_context *ctx) {
    // here, even if false, we read until the end of arg!
    int balance = check_brace_balance(curr_char, false, true); 

    static bool first_char = true; 
    if (curr_char == '}' && first_char) {
        ctx->ARG = ELSE;
        first_char = true;
        return ARG2;// false, go to the {ELSE} case
    else if (curr_char == '}' && balance == 0) {
        ctx->ARG = THEN;
        first_char = true; // RESET as we exit
        return ARG2; // true and reached the end of ARG3, go to the {THEN} case 
    first_char = false; // no longer looking for 
    return ARG3;


enum state IF_THEN(char curr_char, string_t *arg2_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx) {
    // purpose of this function is to read in the THEN conditional --> ELSE will handle the logic
    int balance = check_brace_balance(curr_char, false, false); // false because we read in open brace
    static bool first_char = true; // don't write opening brace, but allow thereafter

    if (first_char && curr_char != '{') { // EDGE CASE: missing argument
        DIE("Killed in IF_THEN - %s", "missing then statement");

    if (curr_char == '}' && balance == 0) {
        // minimal
        balance = check_brace_balance(curr_char, true, false); 
        first_char = true; // reset
        return ARG1; 
    if (curr_char == '{' && first_char) {
        first_char = false;
        return ARG2; // don't write opening brace, but allow thereafter (ie nested if)
    first_char = false;
    string_append_char(arg2_buffer, curr_char);
    return ARG2;     

enum state IF_ELSE(char curr_char, string_t* arg3_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, false); // false because we read in open brace
    string_t *arg2_buffer = ctx->arg2_buffer;
    static bool first_char_arg1 = true; // don't write opening brace, but allow thereafter
    if (first_char_arg1 && curr_char != '{') {
        DIE("Killed in IF_ELSE - %s", "missing else statement");
    if (curr_char == '}' && balance == 0) {
        // we store both arguments but select which to output 
        if (ctx->ARG == ELSE) {
            string_push_reversed(comment_processed_buffer, arg3_buffer);   
        } else if (ctx->ARG == THEN) {
            string_push_reversed(comment_processed_buffer, arg2_buffer); 
        // cleanup
        ctx->ARG = NONE; 
        balance = check_brace_balance(curr_char, true, false); 
        first_char_arg1 = true;
        return PLAINTEXT; 
    if (curr_char == '{' && first_char_arg1) {
        first_char_arg1 = false; 
        return ARG1; // don't write opening brace, but allow thereafter (ie nested if)
    first_char_arg1 = false;
    string_append_char(arg3_buffer, curr_char);
    return ARG1;  
// \IFDEF ///////////////////////////////////////////////////////////////////////////////
enum state IFDEF_COND(char curr_char, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, true); 
    static bool first_char = true; 

    // EX: /ifdef{wro}ng} but allow /ifdef{wrong} (closing brace accepted)
    if ((isalnum(curr_char) == 0 && balance != 0)
        || (curr_char == '}' && first_char)) {  // empty COND argument
        DIE("%c is not alphanumeric", curr_char);

    if (curr_char == '}') {
        if (list_retrieve(ctx->macro_ds, ctx->arg1_buffer->data)) {
            ctx->ARG = THEN;
        } else {
            // not defined macro
            ctx->ARG = ELSE;
        return ARG2;

    string_append_char(ctx->arg1_buffer, curr_char);
    first_char = false;
    return ARG3;

enum state IFDEF_THEN(char curr_char, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, false); // add flag off since we read in the open brace
    static bool first_char = true; 

    if (first_char && curr_char != '{') {
        // EDGE CASE: missing argument
        DIE("Killed in IF_THEN - %s", "missing then statement");
    } else if (first_char && curr_char == '{') {
        // EDGE CASE: nested statements -- don't write opening brace
        first_char = false;
        return ARG2; 

    // read in ARG2 but handle logic in the ELSE
    if (curr_char == '}' && balance == 0) {
        balance = check_brace_balance(curr_char, true, false);
        first_char = true;
        return ARG1;

    first_char = false;
    string_append_char(ctx->arg2_buffer, curr_char);
    return ARG2; 

enum state IFDEF_ELSE(char curr_char, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, false);
    static bool first_char = true;

    if (first_char && curr_char != '{') {
        DIE("Killed in IF_ELSE - %s", "missing else statement");
    } else if (first_char && curr_char == '{') {
        first_char = false; 
        return ARG1;    // don't write opening brace

    if (curr_char == '}' && balance == 0) {
        // store both arguments but select which to push to before (dep on conditional)
        if (ctx->ARG == ELSE) {
            string_push_reversed(ctx->comment_processed_buffer, ctx->arg3_buffer);   
        } else if (ctx->ARG == THEN) {
            string_push_reversed(ctx->comment_processed_buffer, ctx->arg2_buffer); 

        ctx->ARG = NONE; 
        balance = check_brace_balance(curr_char, true, false); 
        first_char = true;
        return PLAINTEXT;       
    first_char = false;
    string_append_char(ctx->arg3_buffer, curr_char);
    return ARG1;
// \INCLUDE ///////////////////////////////////////////////////////////////////////////////
enum state INCLUDE_PATH(char curr_char, parser_context *ctx) {
    if (curr_char == '}') {
        // read file and replace macro with contents
        string_t *content = read_file(ctx->arg1_buffer->data);
        if (!content) {
            DIE("cannot read %s", ctx->arg1_buffer->data);

        // push string of file back to buffer
        string_push_reversed(ctx->comment_processed_buffer, content);
        return PLAINTEXT;

    string_append_char(ctx->arg1_buffer, curr_char);
    return ARG1;

string_t *read_file(const char *path) {
    FILE *file = fopen(path, "r");
    if (!file) {
       return NULL;

    fseek(file, 0, SEEK_END); // move cursor to end of file
    long file_size = ftell(file); // store size of text
    if (file_size < 0) {
        return NULL;
    rewind(file); // go back and prepare to read

    string_t *content = string_buffer_create();
    if (!content) {
        return NULL; 

    // ensure string buffer is big enough
    while (content->capacity < (size_t)file_size + 1) {

    size_t bytes_read = fread(content->data, 1, file_size, file);
    if (bytes_read != (size_t)file_size) {
        // discrepancy in read --> error
        return NULL;

    // null terminate and update size
    content->data[file_size] = '\0';
    content->size = file_size;

    return content;
// \EXPANDAFTER ///////////////////////////////////////////////////////////////////////////////
enum state EXPANDAFTER_BEFORE(char curr_char, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, true); 

    if (curr_char == '}' && balance == 0) {
        // read and store only -- don't append arg2_buffer to anything yet
        return ARG1; 

    string_append_char(ctx->expaf_before_buffer, curr_char);
    return ARG2; 


enum state EXPANDAFTER_AFTER(char curr_char, parser_context *ctx) {
    int balance = check_brace_balance(curr_char, false, false);
    static bool first_char = true;

   if (first_char && curr_char != '{') {
        DIE("Killed in EXPANDAFTER_AFTER - %s", "missing after statement");
    } else if (first_char && curr_char == '{') {
        first_char = false; 
        return ARG1;    // don't write opening brace

    if (curr_char == '}' && balance == 0) {
        // reverse and push AFTER arg --> expand
        string_clear(ctx->macro_buffer); // clear for use in recursive call
        expansion_stack(ctx, ctx->expaf_after_buffer); // AFTER is now fully expanded

        // concatenate BEFORE + AFTER
        string_append_string(ctx->arg1_buffer, ctx->expaf_before_buffer); // BEFORE
        string_append_string(ctx->arg1_buffer, ctx->expaf_after_buffer); // AFTER

        // push processed /expandafter to our main stack
        string_push_reversed(ctx->comment_processed_buffer, ctx->arg1_buffer); 

        // cleanup
        string_clear(ctx->arg1_buffer); // total expandafter expansion
        string_clear(ctx->expaf_before_buffer); // before expansion
        string_clear(ctx->expaf_after_buffer); // recursed after expansion 
        balance = check_brace_balance(curr_char, true, false);
        first_char = true;
        return PLAINTEXT;
    first_char = false;
    string_append_char(ctx->expaf_after_buffer, curr_char);
    return ARG1; 

// state machine (driver function)
enum state macro_process_text(char curr_char, enum state curr_state, parser_context *ctx) {
    // local shortcuts (from refactor)
    string_t *plaintext_buffer = ctx->plaintext_buffer;
    // string_t *arg1_buffer = ctx->arg1_buffer;
    string_t *macro_buffer  = ctx->macro_buffer;
    array_list *macro_ds      = ctx->macro_ds;
    macro *m = ctx->currentMacro;

    // processing 
    switch (curr_state) {
        case PLAINTEXT:
            if (curr_char == '\\') {
                return ESCAPE_CHAR;
            // store in plaintext buffer and pop from stack
            string_append_char(plaintext_buffer, curr_char);
            return PLAINTEXT;
        case ESCAPE_CHAR:
             * \ + whitespace + macro -> plaintext
             * \MacroName {} -> DIE
             * \NoArg -> DIE
            static bool startedName = false;  
            // EX: we don't want an error if escaped comment -> \% 
            if (curr_char == '%') {
                string_append_char(plaintext_buffer, '%');
                startedName = false;
                return PLAINTEXT;
            else if (isspace(curr_char)) {
                // white space after escape --> treat as plaintext
                if (!startedName) {
                    return PLAINTEXT;
                // we're reading the macro name but find an escape --> kill 
                } else {
                    DIE("expected {, found '%c'", curr_char);
            else if (curr_char == '{') { // start reading
                if (!startedName && macro_buffer->size == 0) {
                    DIE("No macro name given before %c", curr_char); // empty macro name

                // Now route to the appropriate macro case
                if (strcmp(macro_buffer->data, "def") == 0) {
                    macro *new_macro = macro_create();

                    // append by reference
                    list_append(macro_ds, new_macro);
                    // backend setup, parse macro name now
                    return ARG2;
                else if (strcmp(macro_buffer->data, "undef") == 0) {
                    return ARG1;
                } else if (strcmp(macro_buffer->data, "if") == 0) {
                    return ARG3;
                } else if (strcmp(macro_buffer->data, "ifdef") == 0) {
                    return ARG3;
                } else if (strcmp(macro_buffer->data, "include") == 0) {
                    return ARG1;
                } else if (strcmp(macro_buffer->data, "expandafter") == 0) {
                    return ARG2;
                } else if (list_retrieve(macro_ds, macro_buffer->data)) { // search for user defined macro
                    // found in ds, output
                    ctx->currentMacro = list_retrieve(macro_ds, macro_buffer->data);
                    m = ctx->currentMacro;
                    if (m->has_pound) {
                        return ARG1; // process pound sign
                    // push back onto stack
                    startedName = false;
                    ctx->skip_arg = true; // activate switch
                    return ARG1; 
                else {
                    DIE("%s not defined", macro_buffer->data);
            // otherwise non-whitespace, non-brace => part of macro name
            else {
                string_append_char(macro_buffer, curr_char); // store char in macro_buffer
                startedName = true;   // started reading name

            return ESCAPE_CHAR; 

        case ARG3:
            if (strcmp(macro_buffer->data, "if") == 0) {
                return IF_COND(curr_char, ctx);
            } else if (strcmp(macro_buffer->data, "ifdef") == 0) {
                return IFDEF_COND(curr_char, ctx);
            DIE("Did not enter any cases in ARG3. %s", "ERROR.");

        case ARG2:
            // store name
            if (strcmp(macro_buffer->data, "def") == 0) {
                return DEF_NAME(macro_ds, curr_char, ctx);
            } else if (strcmp(macro_buffer->data, "if") == 0) {
                return IF_THEN(curr_char, ctx->arg2_buffer, macro_buffer, ctx->comment_processed_buffer, ctx);
            } else if (strcmp(macro_buffer->data, "ifdef") == 0) {
                return IFDEF_THEN(curr_char, ctx);
            } else if (strcmp(macro_buffer->data, "expandafter") == 0) {
                return EXPANDAFTER_BEFORE(curr_char, ctx);
            DIE("Did not enter any cases in ARG2. %s", "ERROR.");
            return ARG1;

        case ARG1:
            // store value
            if (strcmp(macro_buffer->data, "def") == 0) {
                return DEF_VALUE(curr_char, curr_state, macro_ds, macro_buffer);
            } else if (strcmp(macro_buffer->data, "undef") == 0) {
                return UNDEF_VALUE(curr_char, ctx);
            } else if (ctx->skip_arg) {
                // no pound present in \def, so skip the arguments if any
                return USERDEF_IGNORE(curr_char, ctx);
            } else if (strcmp(macro_buffer->data, "if") == 0) {
                return IF_ELSE(curr_char, ctx->arg1_buffer, macro_buffer, ctx->comment_processed_buffer, ctx);
            } else if (strcmp(macro_buffer->data, "ifdef") == 0) {
                return IFDEF_ELSE(curr_char, ctx);
            } else if (strcmp(macro_buffer->data, "include") == 0) {
                return INCLUDE_PATH(curr_char, ctx);
            } else if (strcmp(macro_buffer->data, "expandafter") == 0) {
                return EXPANDAFTER_AFTER(curr_char, ctx);
            } else {
                // default: user-defined macro with pound
                return USERDEF_VALUE(curr_char, m, ctx);
            DIE("Did not enter any cases in ARG1. %s", "ERROR.");
            return PLAINTEXT;

        case PROCESSED:
            // OUTPUT: 
            // store_to_output(curr_state, plaintext_buffer, macro_ds);
            return PROCESSED;

            return ERROR;

Leave a Comment