Abc
unknown
plain_text
9 months ago
4.3 kB
6
Indexable
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define MAX_IDENTIFIER_LENGTH 32
#define MAX_TOKEN_LENGTH 100
// Define token types
typedef enum {
TOKEN_UNKNOWN,
TOKEN_KEYWORD,
TOKEN_IDENTIFIER,
TOKEN_NUMBER,
TOKEN_OPERATOR,
TOKEN_DELIMITER,
TOKEN_COMMENT
} TokenType;
// Keywords list
const char *keywords[] = {
"int", "float", "if", "else", "while", "return", NULL
};
// Define operators and delimiters
const char operators[] = "+-*/%=<>!";
const char delimiters[] = "(),;{}[]";
// Define a structure for Token
typedef struct {
TokenType type;
char value[MAX_TOKEN_LENGTH];
} Token;
// Function to check if a string is a keyword
int is_keyword(const char *str) {
for (int i = 0; keywords[i] != NULL; i++) {
if (strcmp(str, keywords[i]) == 0) {
return 1;
}
}
return 0;
}
// Function to get the next token
Token get_next_token(FILE *file) {
Token token;
token.type = TOKEN_UNKNOWN;
token.value[0] = '\0';
char c;
int index = 0;
// Skip over spaces, tabs, and newlines
while ((c = fgetc(file)) != EOF && (isspace(c) || c == '\n' || c == '\r' || c == '\t')) {
// Skip white spaces
}
// Handle end of file
if (c == EOF) {
return token;
}
// Handle single-line comments (//)
if (c == '/' && (c = fgetc(file)) == '/') {
while ((c = fgetc(file)) != EOF && c != '\n');
return get_next_token(file); // Skip to the next token after the comment
}
// Handle multi-line comments (/* ... */)
if (c == '/' && (c = fgetc(file)) == '*') {
while ((c = fgetc(file)) != EOF) {
if (c == '*' && (c = fgetc(file)) == '/') {
break;
}
}
return get_next_token(file); // Skip to the next token after the comment
}
// Handle identifiers and keywords
if (isalpha(c) || c == '_') {
token.value[index++] = c;
while (isalnum(c = fgetc(file)) || c == '_') {
token.value[index++] = c;
}
token.value[index] = '\0';
ungetc(c, file); // Put the last non-matching character back
token.type = is_keyword(token.value) ? TOKEN_KEYWORD : TOKEN_IDENTIFIER;
return token;
}
// Handle numbers (integers and floating-point)
if (isdigit(c)) {
token.value[index++] = c;
while (isdigit(c = fgetc(file))) {
token.value[index++] = c;
}
if (c == '.') {
token.value[index++] = c;
while (isdigit(c = fgetc(file))) {
token.value[index++] = c;
}
}
token.value[index] = '\0';
ungetc(c, file);
token.type = TOKEN_NUMBER;
return token;
}
// Handle operators
for (int i = 0; operators[i] != '\0'; i++) {
if (c == operators[i]) {
token.value[0] = c;
token.value[1] = '\0';
token.type = TOKEN_OPERATOR;
return token;
}
}
// Handle delimiters (braces, parentheses, commas, etc.)
for (int i = 0; delimiters[i] != '\0'; i++) {
if (c == delimiters[i]) {
token.value[0] = c;
token.value[1] = '\0';
token.type = TOKEN_DELIMITER;
return token;
}
}
// If none of the above, mark as unknown
token.value[0] = c;
token.value[1] = '\0';
token.type = TOKEN_UNKNOWN;
return token;
}
// Function to print the token details
void print_token(Token token) {
const char *token_type_str[] = {"Unknown", "Keyword", "Identifier", "Number", "Operator", "Delimiter", "Comment"};
printf("Type: %-12s Value: %s\n", token_type_str[token.type], token.value);
}
// Main function to test the lexical analyzer
int main() {
FILE *file = fopen("test_program.c", "r");
if (file == NULL) {
perror("Error opening file");
return 1;
}
Token token;
while ((token = get_next_token(file)).type != TOKEN_UNKNOWN) {
print_token(token);
}
fclose(file);
return 0;
}
Editor is loading...
Leave a Comment