Untitled
unknown
plain_text
a year ago
43 kB
4
Indexable
#include "proj1.h"
#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>
// ~3 hrs 45 mins to complete String Library
// ~1 hr 45 min to introduce skeleton and initial state
// PHASE I: COMPLETE
enum state {PLAINTEXT,
ESCAPE_CHAR,
ARG1, ARG2, ARG3, // macro arguments
UNDEF,
PROCESSED,
ERROR
};
enum arg {THEN, // for if/ifdef statements
ELSE,
NONE // default state
};
// for string library
typedef struct {
char *data;
size_t size;
size_t capacity;
} string_t;
typedef struct {
string_t *name;
string_t *value;
bool has_pound; // for arg substitution
} macro;
typedef struct {
macro **data; // arr list of macros
size_t size;
size_t capacity;
} array_list;
typedef struct {
string_t *comment_processed_buffer;
string_t *plaintext_buffer;
string_t *macro_buffer;
string_t *arg1_buffer; // reuse for expandafter
string_t *arg2_buffer;
string_t *arg3_buffer;
string_t *expaf_before_buffer; // arg buffers will be utilized during recursion, so store before/after separately
string_t *expaf_after_buffer; // ^^
array_list *macro_ds;
macro *currentMacro;
bool skip_arg; // for the \def macro => skip arg if there is no pound
enum arg ARG;
} parser_context;
//---------------------------------------------------------
// data structures and prototypes
//---------------------------------------------------------
// forward declare for opaque struct
// typedef struct string_t string_t;
/**
* Creates a typed string, populating data, size and capacity fields
*
* @param string input chars to turn into string
*/
string_t *string_create_from_string(const char *str);
/**
* Creates an output buffer, populating size and capacity for future
* reallocs.
*/
string_t *string_buffer_create();
/**
* Destroys a string_t object
*
* @param str
*/
void string_destroy(string_t *str);
/**
* Concatenates a character to the end of a given string
*
* @param str pointer to a string object
* @param c character to add
*/
void string_append_char(string_t *str, const char c);
/**
* Concatenates a given str (added_str) to the first string
* given in the args.
*
* @param base_str ptr to base string
* @param added_str ptr to string to be added
*/
void string_append_string(string_t *base_str, const string_t *added_str);
void string_grow(string_t* str);
void string_pop_char(string_t *str, const char c);
void string_reverse(string_t *str);
/**
* Called when exiting a macro expansion. 'Resets' the buffer by
* setting its size to 0, effectively appending over the previous
* data. The null terminator added by string_append_char() will
* effectively erase previous data in buffers to allow us to use
* strcmp() on the macro names.
*/
void string_clear(string_t *macro_buffer);
void string_remove_newline(string_t *str);
/**
* Pushes the reversed contents of `src` onto the end of `dest`.
* i.e. if src = "Hello", we append "olleH" to `dest`.
*/
void string_push_reversed(string_t *dest, const string_t *src);
/**
* State machine
*/
enum state macro_process_text(char curr_char, enum state curr_state, parser_context *ctx);
//---------------------------------------------------------
// macro data structure
//---------------------------------------------------------
macro *macro_create(void);
macro *macro_create(void) {
// initialize macro
macro *this_macro = malloc(sizeof(macro));
if (!this_macro) {
DIE("Failed malloc. %s", "END");
}
// allocating fields
this_macro->name = string_buffer_create();
this_macro->value = string_buffer_create();
this_macro->has_pound = false;
if (!this_macro->name || !this_macro->value) {
DIE("Failed to create string buffers for macro fields. %s", "END");
}
return this_macro;
}
//---------------------------------------------------------
// array list prototypes
//---------------------------------------------------------
array_list *list_create(void);
void list_destroy(array_list *list);
void list_append(array_list *list, macro *new_macro);
void list_grow(array_list *list);
void list_remove(array_list *list, macro *new_macro);
macro *list_retrieve(array_list *list, const char *macro_name);
enum comment_state {READING,
COMMENT,
BLANK_SKIP
};
void process_comments(FILE *infile, string_t *buffer);
//---------------------------------------------------------
// context prototypes
//---------------------------------------------------------
parser_context *context_create(void);
parser_context *context_create(void) {
parser_context *context = malloc(sizeof(parser_context));
if (!context) {
DIE("Failed Malloc. %s", "ERROR");
}
// init buffers and list
context->comment_processed_buffer = string_buffer_create();
context->plaintext_buffer = string_buffer_create();
context->macro_buffer = string_buffer_create();
context->arg1_buffer = string_buffer_create();
context->arg2_buffer = string_buffer_create();
context->arg3_buffer = string_buffer_create();
context->expaf_before_buffer = string_buffer_create();
context->expaf_after_buffer = string_buffer_create();
context->macro_ds = list_create();
context->skip_arg = false;
context->ARG = NONE;
// error check
if (!context->comment_processed_buffer || !context->plaintext_buffer ||
!context->macro_buffer || !context->arg1_buffer ||
!context->arg2_buffer || !context->arg3_buffer ||
!context->expaf_before_buffer || !context->expaf_after_buffer
|| !context->macro_ds) {
DIE("Failed to initialize buffers or list. %s", "ERROR");
}
return context;
}
void context_destroy(parser_context *context);
void context_destroy(parser_context *context) {
if (!context) {
return;
}
if (context->comment_processed_buffer) {
string_destroy(context->comment_processed_buffer);
}
if (context->plaintext_buffer) {
string_destroy(context->plaintext_buffer);
}
if (context->macro_buffer) {
string_destroy(context->macro_buffer);
}
if (context->arg1_buffer) {
string_destroy(context->arg1_buffer);
}
if (context->arg2_buffer) {
string_destroy(context->arg2_buffer);
}
if (context->arg3_buffer) {
string_destroy(context->arg3_buffer);
}
if (context->expaf_before_buffer) {
string_destroy(context->expaf_before_buffer);
}
if (context->expaf_after_buffer) {
string_destroy(context->expaf_after_buffer);
}
if (context->macro_ds) {
list_destroy(context->macro_ds);
}
free(context);
}
bool special_char_escaped(char curr_char, bool reset);
int check_brace_balance(char curr_char, bool reset, bool add_one_openbrace);
enum state DEF_NAME(array_list *macro_ds, char curr_char, parser_context *ctx);
enum state DEF_VALUE(char curr_char, enum state curr_state, array_list *macro_ds, string_t *macro_buffer);
enum state UNDEF_VALUE(char curr_char, parser_context *ctx);
void macro_substitute_arg(const macro *m, const string_t *arg, parser_context *ctx);
enum state USERDEF_VALUE(char curr_char, const macro *m, parser_context *ctx);
enum state USERDEF_IGNORE(char curr_char, parser_context *ctx); // EX: /jake{} => no pound so ignore everything after the '{' up until '}'
enum state IF_THEN(char curr_char, string_t *arg2_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx);
enum state IF_ELSE(char curr_char, string_t* arg3_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx);
enum state IF_COND(char curr_char, parser_context *ctx);
enum state IFDEF_COND(char curr_char, parser_context *ctx);
enum state IFDEF_THEN(char curr_char, parser_context *ctx);
enum state IFDEF_ELSE(char curr_char, parser_context *ctx);
string_t *read_file(const char *path);
enum state INCLUDE_PATH(char curr_char, parser_context *ctx);
void expansion_stack(parser_context *ctx, string_t *source_buffer);
enum state EXPANDAFTER_BEFORE(char curr_char, parser_context *ctx);
enum state EXPANDAFTER_AFTER(char curr_char, parser_context *ctx);
//---------------------------------------------------------
// main
//---------------------------------------------------------
// PHASE II: Think of what states you'll need to have + implement basic character scanning
// static void print_all_macros(const array_list *list) {
// if (!list) {
// printf("(macro_ds is NULL)\n");
// return;
// }
// printf("\n=== Macro Database Contents ===\n");
// for (size_t i = 0; i < list->size; i++) {
// macro *m = &list->data[i];
// printf("Macro %zu: name=\"%s\", value=\"%s\"\n",
// i,
// m->name->data ? m->name->data : "(null)",
// m->value->data ? m->value->data : "(null)");
// }
// printf("=== End of Macro Database ===\n\n");
// }
int main(int argc, char *argv[]) {
parser_context *ctx = context_create();
string_t *comment_processed_buffer = ctx->comment_processed_buffer;
string_t *plaintext_buffer = ctx->plaintext_buffer;
// INPUT CASE #1: read from stdin with indirect
if (argc == 1) {
// read from stdin
process_comments(stdin, comment_processed_buffer);
}
// INPUT CASE #2: read from specified files (can read multiple)
else {
// process comments: PROCESS ALL FILES FIRST AND THEN CONCATENATE CLEAN FILES!!!
for (size_t i = 1; i < argc; i++) {
FILE *infile = fopen(argv[i], "r");
if (infile == NULL) {
DIE("%s is not a valid file input. Please try again.", argv[i]);
}
process_comments(infile, comment_processed_buffer);
// separate files?
// string_append_char(comment_processed_buffer, '\n');
fclose(infile);
}
}
// reverse-string stack approach:
string_reverse(comment_processed_buffer);
// run expansions
expansion_stack(ctx, comment_processed_buffer);
// print
printf("%s", plaintext_buffer->data);
// free
context_destroy(ctx);
return 0;
}
//---------------------------------------------------------
// expansion logic
//---------------------------------------------------------
void expansion_stack(parser_context *ctx, string_t *source_buffer) {
enum state curr_state = PLAINTEXT;
size_t stackTop = source_buffer->size;
while (stackTop > 0) {
stackTop--;
char c = source_buffer->data[stackTop]; // point to top of stack
source_buffer->size = stackTop; // update buffer size
curr_state = macro_process_text(c, curr_state, ctx); // expand macros and update stack
stackTop = source_buffer->size; // update pointer to top of stack after expansions
}
}
//---------------------------------------------------------
// string library
//---------------------------------------------------------
string_t *string_create_from_string(const char *string) {
// DIE("Invalid string passed to string_create(), %s", NULL);
if (string == NULL) {
return NULL;
}
// allocate memory
string_t *new_str = malloc(sizeof(string_t));
if (!new_str) {
return NULL;
}
// fill fields
size_t str_len = strlen(string);
new_str->size = str_len;
new_str->capacity = str_len + 1;
new_str->data = malloc(sizeof(char) * new_str->capacity); // include null terminator!!!
if (!new_str->data) {
free(new_str->data);
free(new_str);
return NULL;
}
strcpy(new_str->data, string);
return new_str;
}
string_t *string_buffer_create(void) {
// large init size for efficient starting threshold
size_t init_size = 250;
string_t *buffer = malloc(sizeof(string_t));
if (!buffer) {
return NULL;
}
buffer->size = 0;
buffer->capacity = init_size;
buffer->data = malloc(sizeof(char) * buffer->capacity);
if (!buffer->data) {
free(buffer->data);
free(buffer);
return NULL;
}
// init buffer with null terminator --> empty string
buffer->data[0] = '\0';
return buffer;
}
void string_destroy(string_t *str) {
if (str) {
free(str->data); // free string
free(str);
}
}
void string_grow(string_t* str) {
str->data = DOUBLE(str->data, str->capacity);
if (!str->data) {
DIE("Double failed in string_put_char(): %s", str->data);
}
}
void string_append_char(string_t *str, const char c) {
size_t curr_capacity = str->capacity;
size_t new_arr_len = str->size + 2; // new char + null terminator (excluded by strlen)
// grow array if capacity is less than new
if (curr_capacity < new_arr_len) {
string_grow(str);
}
// add char and null terminator to string
str->data[str->size] = c;
str->data[str->size + 1] = '\0';
str->size++;
}
void string_append_string(string_t *base_str, const string_t *added_str) {
size_t curr_capacity = base_str->capacity;
size_t new_arr_len = base_str->size + added_str->size + 1; // accounting for null terminator of new str
// edge case: string to cat is significantly large than DOUBLE can fulfill
while (curr_capacity < new_arr_len) {
string_grow(base_str);
}
// concatenate
memcpy(base_str->data + base_str->size, added_str->data, added_str->size);
// add null-terminate and update size
base_str->data[new_arr_len - 1] = '\0';
base_str->size += added_str->size;
}
void string_pop_char(string_t *str, const char c) {
if (str->size == 0) {
DIE("Trying to pop an empty string!: %zu", str->size);
}
// decrement size and use as index access point
str->size--;
str->data[str->size] = '\0';
// potential edge case -- implementing automatic shrink
/*
if (str->size < str->capacity / 4) {
str->capacity /= 2;
str->data = realloc(str->data, str->capacity);
}
*/
}
void string_reverse(string_t *str) {
size_t start = 0;
size_t end = str->size - 1;
while (start < end) {
// swap
char temp = str->data[start];
str->data[start] = str->data[end];
str->data[end] = temp;
start++;
end--;
}
}
void string_clear(string_t *str) {
if (!str || !str->data) return;
str->size = 0;
str->data[0] = '\0';
}
void string_remove_newline(string_t *str) {
if (str == NULL) {
DIE("NULL buffer. %s", "ERROR");
}
size_t len = strlen(str->data);
if (len > 0 && str->data[len - 1] == '\n') {
str->data[len - 1] = '\0'; // replace the newline with a null terminator
}
}
void string_push_reversed(string_t *dest, const string_t *src) {
// from size down to 1
for (size_t i = src->size; i > 0; i--) {
string_append_char(dest, src->data[i - 1]); // remember 0 based !
}
}
//---------------------------------------------------------
// array list library
//---------------------------------------------------------
array_list *list_create(void) {
array_list *list = malloc(sizeof(array_list));
if (!list) {
DIE("Failed malloc. %s", "END");
}
// init. capacity of macro array list
size_t init_capacity = 10;
// init. fields
list->data = malloc(sizeof(macro *) * init_capacity);
if (!list->data) {
free(list);
DIE("Failed malloc. %s", "END");
}
list->size = 0;
list->capacity = init_capacity;
return list;
}
void list_destroy(array_list *list) {
if (!list) DIE("NULL list in list_destroy. %s", "END");
// free each macro's fields in array list
for (size_t i = 0; i < list->size; i++) {
macro *m = list->data[i];
if (m) {
string_destroy(m->name);
string_destroy(m->value);
free(m);
}
}
free(list->data);
free(list);
}
void list_grow(array_list *list) {
list->data = DOUBLE(list->data, list->capacity);
if (!list->data) {
DIE("Double failed in string_put_char(): %s", "ERROR");
}
}
void list_append(array_list *list, macro *new_macro) {
if (!list) {
WARN("Invalid list passed in. %s", "FAILED");
}
// check capacity
if (list->size == list->capacity) {
list_grow(list);
}
// store fields of macro
list->data[list->size] = new_macro;
list->size++;
}
void list_remove(array_list *list, macro *remove_macro) {
// linear search
for (size_t i = 0; i < list->size; i++) {
if (strcmp(list->data[i]->name->data, remove_macro->name->data) == 0) {
// free macro fields in arr
free(list->data[i]->name);
free(list->data[i]->value);
// avoid costly shifting by replacing deleted w/ last macro
if (i != list->size - 1) {
list->data[i] = list->data[list->size - 1];
}
list->size--;
return;
}
}
// macro was not found
DIE("MACRO NOT FOUND. %s", "ERROR");
return;
}
macro *list_retrieve(array_list *list, const char *macro_name) {
for (size_t i = 0; i < list->size; i++) {
macro *m = list->data[i];
if (strcmp(m->name->data, macro_name) == 0) {
return m;
}
}
// not found
return NULL;
}
//---------------------------------------------------------
// comment processing state machine
//---------------------------------------------------------
void process_comments(FILE *infile, string_t *buffer) {
enum comment_state state = READING;
char ch = 'a';
while ((ch = fgetc(infile)) != EOF) {
switch(state) {
case READING:
// escape state (true is unescaped, false is escaped)
bool unescaped = special_char_escaped(ch, false);
if (ch == '\n') {
// reset escape parity
special_char_escaped('\0', true);
}
// process comment if unescaped is true
if (ch == '%' && unescaped) {
state = COMMENT;
} else {
string_append_char(buffer, ch);
}
break;
case COMMENT:
unescaped = special_char_escaped(ch, false);
if (ch == '\n') {
// "first non-blank, non-tab char following newline"
state = BLANK_SKIP;
// reset and transistion to next line
special_char_escaped('\0', true);
}
break;
case BLANK_SKIP:
unescaped = special_char_escaped(ch, false);
if (ch == '\n') {
// string_append_char(buffer, ch); // process second newline
} else if (ch == '%') {
state = COMMENT;
} else if (!isspace(ch)) {
// found a char, resume processing
state = READING;
string_append_char(buffer, ch);
}
break;
}
}
}
//---------------------------------------------------------
// state machine
//---------------------------------------------------------
/**
* Note: the third parameter is a flag for initializing balance to 1
* instead of 0. In cases where we read the opening brace in ESCAPE_CHAR separately instead of the
* state of the caller, we add one to equalize the balance counter.
*/
int check_brace_balance(char curr_char, bool reset, bool add_one_openbrace) {
// encapsulate brace balance check
static int balance = 0;
if (add_one_openbrace && balance == 0) {
balance = 1; // if the front brace has already been read in a diff. state
}
// reset when we leave the state to prep for next states
if (reset) {
balance = 0;
} else {
if (curr_char == '{') {
balance++;
} else if (curr_char == '}') {
balance--;
}
}
return balance;
}
bool special_char_escaped(char curr_char, bool reset) {
// persistence counter for consecutive escapes
static int escape_count = 0;
if (reset) {
// reset and default
escape_count = 0;
return true;
}
if (curr_char == '\\') {
escape_count++;
return (escape_count % 2 == 0); // Return true for even parity, false for odd parity
} else {
bool isUnescaped = (escape_count % 2 == 0);
escape_count = 0;// reset if a non-backslash character is encountered
return isUnescaped;
}
}
// \DEF /////////////////////////////////////////////////////////////////////////////
enum state DEF_NAME(array_list *macro_ds, char curr_char, parser_context *ctx) {
// subtract 1 to access recently appended macro
size_t access_index = macro_ds->size - 1;
macro *m = macro_ds->data[access_index]; // ptr to macro struct
string_t *arg2_buffer = ctx->arg2_buffer; // intermediate buffer
if (curr_char == '}') {
// EDGE CASE: check if the user is trying to redefine a macro before undefining
if (list_retrieve(macro_ds, arg2_buffer->data)) {
DIE("cannot redefine %s", arg2_buffer->data);
}
string_append_string(m->name, arg2_buffer);
string_clear(arg2_buffer);
return ARG1; // read the value next
}
// not alphanumeric, throw error
if (isalnum(curr_char) == 0) {
DIE("%c is not alphanumeric", curr_char);
}
string_append_char(arg2_buffer, curr_char);
return ARG2;
}
enum state DEF_VALUE(char curr_char, enum state curr_state, array_list *macro_ds, string_t *macro_buffer) {
static bool start = false; // skip the opening brace of the arg
bool unescaped = special_char_escaped(curr_char, false); ; // true if no escape char, false if there is
int balance = check_brace_balance(curr_char, false, false); // to check brace balance
if (start) {
// check brace balance
if (curr_char == '{' || curr_char == '}') {
balance = check_brace_balance(curr_char, false, false);
}
// check escape characters
if (curr_char == '#' && !unescaped) {
return ARG1; // odd parity -> escaped pound so DO NOT append
}
// exit condition: balanced closing brace
if (curr_char == '}' && balance == 0) {
// output newline
// string_append_char(plaintext_buffer, '\n');
// don't store closing brace
balance = check_brace_balance(curr_char, true, false); // reset
special_char_escaped(curr_char, true);
string_clear(macro_buffer); // reset macro_buffer for next iteration
start = false;
return PLAINTEXT;
}
// append value
size_t insert = macro_ds->size - 1;
macro *m = macro_ds->data[insert];
string_t *this_macro_value = m->value;
if (curr_char == '#' && unescaped) {
m->has_pound = true;
}
string_append_char(this_macro_value, curr_char);
return ARG1;
}
// NOTE: we always return before reaching here after first iteration
if (curr_char == '{' && !start) {
// skip the opening brace of value arg
start = true;
check_brace_balance(curr_char, false, false);
} else {
// if first char in DEF_VAL is not an opening brace, then semantic error
DIE("expected {, found %c", curr_char); // Ex: space between macro args
}
return ARG1;
}
void macro_substitute_arg(const macro *m, const string_t *arg, parser_context *ctx) {
// read user-def macro value and substitute '#' with the argument --> place back in stack
const char *val = m->value->data;
size_t val_len = m->value->size;
for (size_t i = val_len; i > 0; i--) {
char ch = val[i-1];
if (ch == '#') {
// insert arg
for (size_t j = arg->size; j > 0; j--) {
string_append_char(ctx->comment_processed_buffer, arg->data[j - 1]);
}
} else {
string_append_char(ctx->comment_processed_buffer, ch); // normal char -- push as usual
}
}
}
// USER DEFINED MACROS /////////////////////////////////////////////////////////////////////////////
enum state USERDEF_VALUE(char curr_char, const macro *m, parser_context *ctx) {
string_t *arg1_buffer = ctx->arg1_buffer;
string_t *macro_buffer = ctx->macro_buffer;
if (curr_char == '{') {
// skip this char
} else if (curr_char == '}') {
// end of argument -> place back in stack
macro_substitute_arg(m, arg1_buffer, ctx);
string_clear(macro_buffer);
string_clear(arg1_buffer);
return PLAINTEXT;
} else {
// accumulate chars into arg_buffer
string_append_char(arg1_buffer, curr_char);
}
return ARG1;
}
enum state USERDEF_IGNORE(char curr_char, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, true); // to check brace balance
array_list *macro_ds = ctx->macro_ds;
string_t *macro_buffer = ctx->macro_buffer;
macro *m = ctx->currentMacro;
// exit condition
if (curr_char == '}' && balance == 0) {
// reset!!
ctx->currentMacro = list_retrieve(macro_ds, macro_buffer->data);
m = ctx->currentMacro;
string_clear(macro_buffer);
string_push_reversed(ctx->comment_processed_buffer, m->value);
balance = check_brace_balance(curr_char, true, true);
special_char_escaped(curr_char, true);
ctx->skip_arg = false;
return PLAINTEXT;
}
return ARG1;
}
// \UNDEF /////////////////////////////////////////////////////////////////////////////
enum state UNDEF_VALUE(char curr_char, parser_context *ctx) {
array_list *macro_ds = ctx->macro_ds;
string_t *arg1_buffer = ctx->arg1_buffer; // macro name to be undefined
string_t *macro_buffer = ctx->macro_buffer;
macro *undef_this = list_retrieve(macro_ds, arg1_buffer->data);
if (curr_char == '}') {
// check if the given macro name exists
if (undef_this) {
// defined macro exists --> remove from array list
list_remove(macro_ds, undef_this);
string_clear(arg1_buffer); // CLEANUP
string_clear(macro_buffer);
return PLAINTEXT;
} else {
// failed undef
DIE("cannot undef %s", arg1_buffer->data);
}
}
string_append_char(arg1_buffer, curr_char);
return ARG1;
}
// \IF ///////////////////////////////////////////////////////////////////////////////
enum state IF_COND(char curr_char, parser_context *ctx) {
// here, even if false, we read until the end of arg!
int balance = check_brace_balance(curr_char, false, true);
static bool first_char = true;
if (curr_char == '}' && first_char) {
ctx->ARG = ELSE;
first_char = true;
return ARG2;// false, go to the {ELSE} case
}
else if (curr_char == '}' && balance == 0) {
ctx->ARG = THEN;
first_char = true; // RESET as we exit
return ARG2; // true and reached the end of ARG3, go to the {THEN} case
}
first_char = false; // no longer looking for
return ARG3;
}
enum state IF_THEN(char curr_char, string_t *arg2_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx) {
// purpose of this function is to read in the THEN conditional --> ELSE will handle the logic
int balance = check_brace_balance(curr_char, false, false); // false because we read in open brace
static bool first_char = true; // don't write opening brace, but allow thereafter
if (first_char && curr_char != '{') { // EDGE CASE: missing argument
DIE("Killed in IF_THEN - %s", "missing then statement");
}
if (curr_char == '}' && balance == 0) {
// minimal
balance = check_brace_balance(curr_char, true, false);
first_char = true; // reset
return ARG1;
}
if (curr_char == '{' && first_char) {
first_char = false;
return ARG2; // don't write opening brace, but allow thereafter (ie nested if)
}
first_char = false;
string_append_char(arg2_buffer, curr_char);
return ARG2;
}
enum state IF_ELSE(char curr_char, string_t* arg3_buffer, string_t *macro_buffer, string_t *comment_processed_buffer, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, false); // false because we read in open brace
string_t *arg2_buffer = ctx->arg2_buffer;
static bool first_char_arg1 = true; // don't write opening brace, but allow thereafter
if (first_char_arg1 && curr_char != '{') {
DIE("Killed in IF_ELSE - %s", "missing else statement");
}
if (curr_char == '}' && balance == 0) {
// we store both arguments but select which to output
if (ctx->ARG == ELSE) {
string_push_reversed(comment_processed_buffer, arg3_buffer);
} else if (ctx->ARG == THEN) {
string_push_reversed(comment_processed_buffer, arg2_buffer);
}
// cleanup
string_clear(macro_buffer);
string_clear(arg2_buffer);
string_clear(arg3_buffer);
ctx->ARG = NONE;
balance = check_brace_balance(curr_char, true, false);
first_char_arg1 = true;
return PLAINTEXT;
}
if (curr_char == '{' && first_char_arg1) {
first_char_arg1 = false;
return ARG1; // don't write opening brace, but allow thereafter (ie nested if)
}
first_char_arg1 = false;
string_append_char(arg3_buffer, curr_char);
return ARG1;
}
// \IFDEF ///////////////////////////////////////////////////////////////////////////////
enum state IFDEF_COND(char curr_char, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, true);
static bool first_char = true;
// EX: /ifdef{wro}ng} but allow /ifdef{wrong} (closing brace accepted)
if ((isalnum(curr_char) == 0 && balance != 0)
|| (curr_char == '}' && first_char)) { // empty COND argument
DIE("%c is not alphanumeric", curr_char);
}
if (curr_char == '}') {
if (list_retrieve(ctx->macro_ds, ctx->arg1_buffer->data)) {
ctx->ARG = THEN;
} else {
// not defined macro
ctx->ARG = ELSE;
}
return ARG2;
}
string_append_char(ctx->arg1_buffer, curr_char);
first_char = false;
return ARG3;
}
enum state IFDEF_THEN(char curr_char, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, false); // add flag off since we read in the open brace
static bool first_char = true;
if (first_char && curr_char != '{') {
// EDGE CASE: missing argument
DIE("Killed in IF_THEN - %s", "missing then statement");
} else if (first_char && curr_char == '{') {
// EDGE CASE: nested statements -- don't write opening brace
first_char = false;
return ARG2;
}
// read in ARG2 but handle logic in the ELSE
if (curr_char == '}' && balance == 0) {
balance = check_brace_balance(curr_char, true, false);
first_char = true;
return ARG1;
}
first_char = false;
string_append_char(ctx->arg2_buffer, curr_char);
return ARG2;
}
enum state IFDEF_ELSE(char curr_char, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, false);
static bool first_char = true;
if (first_char && curr_char != '{') {
DIE("Killed in IF_ELSE - %s", "missing else statement");
} else if (first_char && curr_char == '{') {
first_char = false;
return ARG1; // don't write opening brace
}
if (curr_char == '}' && balance == 0) {
// store both arguments but select which to push to before (dep on conditional)
if (ctx->ARG == ELSE) {
string_push_reversed(ctx->comment_processed_buffer, ctx->arg3_buffer);
} else if (ctx->ARG == THEN) {
string_push_reversed(ctx->comment_processed_buffer, ctx->arg2_buffer);
}
string_clear(ctx->macro_buffer);
string_clear(ctx->arg2_buffer);
string_clear(ctx->arg3_buffer);
string_clear(ctx->arg1_buffer);
ctx->ARG = NONE;
balance = check_brace_balance(curr_char, true, false);
first_char = true;
return PLAINTEXT;
}
first_char = false;
string_append_char(ctx->arg3_buffer, curr_char);
return ARG1;
}
// \INCLUDE ///////////////////////////////////////////////////////////////////////////////
enum state INCLUDE_PATH(char curr_char, parser_context *ctx) {
if (curr_char == '}') {
// read file and replace macro with contents
string_t *content = read_file(ctx->arg1_buffer->data);
if (!content) {
DIE("cannot read %s", ctx->arg1_buffer->data);
}
// push string of file back to buffer
string_push_reversed(ctx->comment_processed_buffer, content);
string_destroy(content);
string_clear(ctx->arg1_buffer);
string_clear(ctx->macro_buffer);
return PLAINTEXT;
}
string_append_char(ctx->arg1_buffer, curr_char);
return ARG1;
}
string_t *read_file(const char *path) {
FILE *file = fopen(path, "r");
if (!file) {
return NULL;
}
fseek(file, 0, SEEK_END); // move cursor to end of file
long file_size = ftell(file); // store size of text
if (file_size < 0) {
fclose(file);
return NULL;
}
rewind(file); // go back and prepare to read
string_t *content = string_buffer_create();
if (!content) {
fclose(file);
return NULL;
}
// ensure string buffer is big enough
while (content->capacity < (size_t)file_size + 1) {
string_grow(content);
}
size_t bytes_read = fread(content->data, 1, file_size, file);
if (bytes_read != (size_t)file_size) {
// discrepancy in read --> error
string_destroy(content);
fclose(file);
return NULL;
}
// null terminate and update size
content->data[file_size] = '\0';
content->size = file_size;
fclose(file);
return content;
}
// \EXPANDAFTER ///////////////////////////////////////////////////////////////////////////////
enum state EXPANDAFTER_BEFORE(char curr_char, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, true);
if (curr_char == '}' && balance == 0) {
// read and store only -- don't append arg2_buffer to anything yet
return ARG1;
}
string_append_char(ctx->expaf_before_buffer, curr_char);
return ARG2;
}
enum state EXPANDAFTER_AFTER(char curr_char, parser_context *ctx) {
int balance = check_brace_balance(curr_char, false, false);
static bool first_char = true;
if (first_char && curr_char != '{') {
DIE("Killed in EXPANDAFTER_AFTER - %s", "missing after statement");
} else if (first_char && curr_char == '{') {
first_char = false;
return ARG1; // don't write opening brace
}
if (curr_char == '}' && balance == 0) {
// reverse and push AFTER arg --> expand
string_reverse(ctx->expaf_after_buffer);
string_clear(ctx->macro_buffer); // clear for use in recursive call
expansion_stack(ctx, ctx->expaf_after_buffer); // AFTER is now fully expanded
// concatenate BEFORE + AFTER
string_append_string(ctx->arg1_buffer, ctx->expaf_before_buffer); // BEFORE
string_append_string(ctx->arg1_buffer, ctx->expaf_after_buffer); // AFTER
// push processed /expandafter to our main stack
string_push_reversed(ctx->comment_processed_buffer, ctx->arg1_buffer);
// cleanup
string_clear(ctx->macro_buffer);
string_clear(ctx->arg1_buffer); // total expandafter expansion
string_clear(ctx->expaf_before_buffer); // before expansion
string_clear(ctx->expaf_after_buffer); // recursed after expansion
balance = check_brace_balance(curr_char, true, false);
first_char = true;
return PLAINTEXT;
}
first_char = false;
string_append_char(ctx->expaf_after_buffer, curr_char);
return ARG1;
}
//---------------------------------------------------------
// state machine (driver function)
//---------------------------------------------------------
enum state macro_process_text(char curr_char, enum state curr_state, parser_context *ctx) {
// local shortcuts (from refactor)
string_t *plaintext_buffer = ctx->plaintext_buffer;
// string_t *arg1_buffer = ctx->arg1_buffer;
string_t *macro_buffer = ctx->macro_buffer;
array_list *macro_ds = ctx->macro_ds;
macro *m = ctx->currentMacro;
// processing
switch (curr_state) {
case PLAINTEXT:
if (curr_char == '\\') {
return ESCAPE_CHAR;
}
// store in plaintext buffer and pop from stack
string_append_char(plaintext_buffer, curr_char);
return PLAINTEXT;
case ESCAPE_CHAR:
/**
* \ + whitespace + macro -> plaintext
* \MacroName {} -> DIE
* \NoArg -> DIE
*/
static bool startedName = false;
// EX: we don't want an error if escaped comment -> \%
if (curr_char == '%') {
string_append_char(plaintext_buffer, '%');
startedName = false;
string_clear(macro_buffer);
return PLAINTEXT;
}
else if (isspace(curr_char)) {
// white space after escape --> treat as plaintext
if (!startedName) {
return PLAINTEXT;
// we're reading the macro name but find an escape --> kill
} else {
DIE("expected {, found '%c'", curr_char);
}
}
else if (curr_char == '{') { // start reading
if (!startedName && macro_buffer->size == 0) {
DIE("No macro name given before %c", curr_char); // empty macro name
}
// Now route to the appropriate macro case
if (strcmp(macro_buffer->data, "def") == 0) {
macro *new_macro = macro_create();
// append by reference
list_append(macro_ds, new_macro);
// backend setup, parse macro name now
return ARG2;
}
else if (strcmp(macro_buffer->data, "undef") == 0) {
return ARG1;
} else if (strcmp(macro_buffer->data, "if") == 0) {
return ARG3;
} else if (strcmp(macro_buffer->data, "ifdef") == 0) {
return ARG3;
} else if (strcmp(macro_buffer->data, "include") == 0) {
return ARG1;
} else if (strcmp(macro_buffer->data, "expandafter") == 0) {
return ARG2;
} else if (list_retrieve(macro_ds, macro_buffer->data)) { // search for user defined macro
// found in ds, output
ctx->currentMacro = list_retrieve(macro_ds, macro_buffer->data);
m = ctx->currentMacro;
if (m->has_pound) {
return ARG1; // process pound sign
}
// push back onto stack
startedName = false;
ctx->skip_arg = true; // activate switch
return ARG1;
}
else {
DIE("%s not defined", macro_buffer->data);
}
}
// otherwise non-whitespace, non-brace => part of macro name
else {
string_append_char(macro_buffer, curr_char); // store char in macro_buffer
startedName = true; // started reading name
}
return ESCAPE_CHAR;
case ARG3:
if (strcmp(macro_buffer->data, "if") == 0) {
return IF_COND(curr_char, ctx);
} else if (strcmp(macro_buffer->data, "ifdef") == 0) {
return IFDEF_COND(curr_char, ctx);
}
DIE("Did not enter any cases in ARG3. %s", "ERROR.");
case ARG2:
// store name
if (strcmp(macro_buffer->data, "def") == 0) {
return DEF_NAME(macro_ds, curr_char, ctx);
} else if (strcmp(macro_buffer->data, "if") == 0) {
return IF_THEN(curr_char, ctx->arg2_buffer, macro_buffer, ctx->comment_processed_buffer, ctx);
} else if (strcmp(macro_buffer->data, "ifdef") == 0) {
return IFDEF_THEN(curr_char, ctx);
} else if (strcmp(macro_buffer->data, "expandafter") == 0) {
return EXPANDAFTER_BEFORE(curr_char, ctx);
}
DIE("Did not enter any cases in ARG2. %s", "ERROR.");
return ARG1;
case ARG1:
// store value
if (strcmp(macro_buffer->data, "def") == 0) {
return DEF_VALUE(curr_char, curr_state, macro_ds, macro_buffer);
} else if (strcmp(macro_buffer->data, "undef") == 0) {
return UNDEF_VALUE(curr_char, ctx);
} else if (ctx->skip_arg) {
// no pound present in \def, so skip the arguments if any
return USERDEF_IGNORE(curr_char, ctx);
} else if (strcmp(macro_buffer->data, "if") == 0) {
return IF_ELSE(curr_char, ctx->arg1_buffer, macro_buffer, ctx->comment_processed_buffer, ctx);
} else if (strcmp(macro_buffer->data, "ifdef") == 0) {
return IFDEF_ELSE(curr_char, ctx);
} else if (strcmp(macro_buffer->data, "include") == 0) {
return INCLUDE_PATH(curr_char, ctx);
} else if (strcmp(macro_buffer->data, "expandafter") == 0) {
return EXPANDAFTER_AFTER(curr_char, ctx);
} else {
// default: user-defined macro with pound
return USERDEF_VALUE(curr_char, m, ctx);
}
DIE("Did not enter any cases in ARG1. %s", "ERROR.");
return PLAINTEXT;
case PROCESSED:
// OUTPUT:
// store_to_output(curr_state, plaintext_buffer, macro_ds);
return PROCESSED;
default:
return ERROR;
}
}Editor is loading...
Leave a Comment