Abc

 avatar
unknown
plain_text
19 days ago
4.3 kB
4
Indexable
#include <stdio.h>

#include <ctype.h>

#include <string.h>
#include <stdlib.h>

#define MAX_IDENTIFIER_LENGTH 32
#define MAX_TOKEN_LENGTH 100

// Define token types
typedef enum {
    TOKEN_UNKNOWN,
    TOKEN_KEYWORD,
    TOKEN_IDENTIFIER,
    TOKEN_NUMBER,
    TOKEN_OPERATOR,
    TOKEN_DELIMITER,
    TOKEN_COMMENT
} TokenType;

// Keywords list
const char *keywords[] = {
    "int", "float", "if", "else", "while", "return", NULL
};

// Define operators and delimiters
const char operators[] = "+-*/%=<>!";
const char delimiters[] = "(),;{}[]";

// Define a structure for Token
typedef struct {
    TokenType type;
    char value[MAX_TOKEN_LENGTH];
} Token;

// Function to check if a string is a keyword
int is_keyword(const char *str) {
    for (int i = 0; keywords[i] != NULL; i++) {
        if (strcmp(str, keywords[i]) == 0) {
            return 1;
        }
    }
    return 0;
}

// Function to get the next token
Token get_next_token(FILE *file) {
    Token token;
    token.type = TOKEN_UNKNOWN;
    token.value[0] = '\0';

    char c;
    int index = 0;

    // Skip over spaces, tabs, and newlines
    while ((c = fgetc(file)) != EOF && (isspace(c) || c == '\n' || c == '\r' || c == '\t')) {
        // Skip white spaces
    }

    // Handle end of file
    if (c == EOF) {
        return token;
    }

    // Handle single-line comments (//)
    if (c == '/' && (c = fgetc(file)) == '/') {
        while ((c = fgetc(file)) != EOF && c != '\n');
        return get_next_token(file); // Skip to the next token after the comment
    }

    // Handle multi-line comments (/* ... */)
    if (c == '/' && (c = fgetc(file)) == '*') {
        while ((c = fgetc(file)) != EOF) {
            if (c == '*' && (c = fgetc(file)) == '/') {
                break;
            }
        }
        return get_next_token(file); // Skip to the next token after the comment
    }

    // Handle identifiers and keywords
    if (isalpha(c) || c == '_') {
        token.value[index++] = c;
        while (isalnum(c = fgetc(file)) || c == '_') {
            token.value[index++] = c;
        }
        token.value[index] = '\0';
        ungetc(c, file); // Put the last non-matching character back
        token.type = is_keyword(token.value) ? TOKEN_KEYWORD : TOKEN_IDENTIFIER;
        return token;
    }

    // Handle numbers (integers and floating-point)
    if (isdigit(c)) {
        token.value[index++] = c;
        while (isdigit(c = fgetc(file))) {
            token.value[index++] = c;
        }
        if (c == '.') {
            token.value[index++] = c;
            while (isdigit(c = fgetc(file))) {
                token.value[index++] = c;
            }
        }
        token.value[index] = '\0';
        ungetc(c, file);
        token.type = TOKEN_NUMBER;
        return token;
    }

    // Handle operators
    for (int i = 0; operators[i] != '\0'; i++) {
        if (c == operators[i]) {
            token.value[0] = c;
            token.value[1] = '\0';
            token.type = TOKEN_OPERATOR;
            return token;
        }
    }

    // Handle delimiters (braces, parentheses, commas, etc.)
    for (int i = 0; delimiters[i] != '\0'; i++) {
        if (c == delimiters[i]) {
            token.value[0] = c;
            token.value[1] = '\0';
            token.type = TOKEN_DELIMITER;
            return token;
        }
    }

    // If none of the above, mark as unknown
    token.value[0] = c;
    token.value[1] = '\0';
    token.type = TOKEN_UNKNOWN;
    return token;
}

// Function to print the token details
void print_token(Token token) {
    const char *token_type_str[] = {"Unknown", "Keyword", "Identifier", "Number", "Operator", "Delimiter", "Comment"};
    
    printf("Type: %-12s Value: %s\n", token_type_str[token.type], token.value);
}

// Main function to test the lexical analyzer
int main() {
    FILE *file = fopen("test_program.c", "r");
    if (file == NULL) {
        perror("Error opening file");
        return 1;
    }

    Token token;
    while ((token = get_next_token(file)).type != TOKEN_UNKNOWN) {
        print_token(token);
    }

    fclose(file);
    return 0;
}
Leave a Comment