Abc
#include <stdio.h> #include <ctype.h> #include <string.h> #include <stdlib.h> #define MAX_IDENTIFIER_LENGTH 32 #define MAX_TOKEN_LENGTH 100 // Define token types typedef enum { TOKEN_UNKNOWN, TOKEN_KEYWORD, TOKEN_IDENTIFIER, TOKEN_NUMBER, TOKEN_OPERATOR, TOKEN_DELIMITER, TOKEN_COMMENT } TokenType; // Keywords list const char *keywords[] = { "int", "float", "if", "else", "while", "return", NULL }; // Define operators and delimiters const char operators[] = "+-*/%=<>!"; const char delimiters[] = "(),;{}[]"; // Define a structure for Token typedef struct { TokenType type; char value[MAX_TOKEN_LENGTH]; } Token; // Function to check if a string is a keyword int is_keyword(const char *str) { for (int i = 0; keywords[i] != NULL; i++) { if (strcmp(str, keywords[i]) == 0) { return 1; } } return 0; } // Function to get the next token Token get_next_token(FILE *file) { Token token; token.type = TOKEN_UNKNOWN; token.value[0] = '\0'; char c; int index = 0; // Skip over spaces, tabs, and newlines while ((c = fgetc(file)) != EOF && (isspace(c) || c == '\n' || c == '\r' || c == '\t')) { // Skip white spaces } // Handle end of file if (c == EOF) { return token; } // Handle single-line comments (//) if (c == '/' && (c = fgetc(file)) == '/') { while ((c = fgetc(file)) != EOF && c != '\n'); return get_next_token(file); // Skip to the next token after the comment } // Handle multi-line comments (/* ... */) if (c == '/' && (c = fgetc(file)) == '*') { while ((c = fgetc(file)) != EOF) { if (c == '*' && (c = fgetc(file)) == '/') { break; } } return get_next_token(file); // Skip to the next token after the comment } // Handle identifiers and keywords if (isalpha(c) || c == '_') { token.value[index++] = c; while (isalnum(c = fgetc(file)) || c == '_') { token.value[index++] = c; } token.value[index] = '\0'; ungetc(c, file); // Put the last non-matching character back token.type = is_keyword(token.value) ? TOKEN_KEYWORD : TOKEN_IDENTIFIER; return token; } // Handle numbers (integers and floating-point) if (isdigit(c)) { token.value[index++] = c; while (isdigit(c = fgetc(file))) { token.value[index++] = c; } if (c == '.') { token.value[index++] = c; while (isdigit(c = fgetc(file))) { token.value[index++] = c; } } token.value[index] = '\0'; ungetc(c, file); token.type = TOKEN_NUMBER; return token; } // Handle operators for (int i = 0; operators[i] != '\0'; i++) { if (c == operators[i]) { token.value[0] = c; token.value[1] = '\0'; token.type = TOKEN_OPERATOR; return token; } } // Handle delimiters (braces, parentheses, commas, etc.) for (int i = 0; delimiters[i] != '\0'; i++) { if (c == delimiters[i]) { token.value[0] = c; token.value[1] = '\0'; token.type = TOKEN_DELIMITER; return token; } } // If none of the above, mark as unknown token.value[0] = c; token.value[1] = '\0'; token.type = TOKEN_UNKNOWN; return token; } // Function to print the token details void print_token(Token token) { const char *token_type_str[] = {"Unknown", "Keyword", "Identifier", "Number", "Operator", "Delimiter", "Comment"}; printf("Type: %-12s Value: %s\n", token_type_str[token.type], token.value); } // Main function to test the lexical analyzer int main() { FILE *file = fopen("test_program.c", "r"); if (file == NULL) { perror("Error opening file"); return 1; } Token token; while ((token = get_next_token(file)).type != TOKEN_UNKNOWN) { print_token(token); } fclose(file); return 0; }
Leave a Comment