Untitled
unknown
plain_text
8 months ago
2.9 kB
6
Indexable
open TokenTypes
exception InvalidInputException
(* Define regexes for complex tokens *)
let int_re = Re.Perl.compile_pat "-?[0-9]+"
let bool_re = Re.Perl.compile_pat "true\\|false"
let id_re = Re.Perl.compile_pat "[a-zA-Z][a-zA-Z0-9]*"
let whitespace_re = Re.Perl.compile_pat "[ \t\n]+"
(* List of fixed tokens, longest matches come first *)
let token_regexes = [
("==", Tok_Equal);
("!=", Tok_NotEqual);
(">=", Tok_GreaterEqual);
("<=", Tok_LessEqual);
("||", Tok_Or);
("&&", Tok_And);
("=", Tok_Assign);
(">", Tok_Greater);
("<", Tok_Less);
("!", Tok_Not);
(";", Tok_Semi);
("(", Tok_LParen);
(")", Tok_RParen);
("{", Tok_LBrace);
("}", Tok_RBrace);
("+", Tok_Add);
("-", Tok_Sub);
("*", Tok_Mult);
("/", Tok_Div);
("^", Tok_Pow);
("int", Tok_Int_Type);
("bool", Tok_Bool_Type);
("printf", Tok_Print);
("main", Tok_Main);
("if", Tok_If);
("else", Tok_Else);
("for", Tok_For);
("from", Tok_From);
("to", Tok_To);
("while", Tok_While);
]
(* Helper to match any of the fixed regex tokens *)
let rec match_fixed str =
match token_regexes with
| [] -> None
| (pat, tok) :: rest ->
let re = Re.Perl.compile_pat pat in
if Re.execp re str then
let matched = Re.Group.get (Re.exec re str) 0 in
Some (tok, matched)
else match_fixed rest
(* Recursive tokenizer *)
let rec tokenize_helper str =
let str = String.trim str in
if str = "" then [EOF]
else if Re.execp whitespace_re str then
let matched = Re.Group.get (Re.exec whitespace_re str) 0 in
let len = String.length matched in
tokenize_helper (String.sub str len (String.length str - len))
else if Re.execp int_re str then
let matched = Re.Group.get (Re.exec int_re str) 0 in
let len = String.length matched in
Tok_Int (int_of_string matched) :: tokenize_helper (String.sub str len (String.length str - len))
else if Re.execp bool_re str then
let matched = Re.Group.get (Re.exec bool_re str) 0 in
let len = String.length matched in
let value = matched = "true" in
Tok_Bool value :: tokenize_helper (String.sub str len (String.length str - len))
else if Re.execp id_re str then
let matched = Re.Group.get (Re.exec id_re str) 0 in
let len = String.length matched in
(* Make sure it's not a reserved keyword *)
match List.assoc_opt matched token_regexes with
| Some tok -> tok :: tokenize_helper (String.sub str len (String.length str - len))
| None -> Tok_ID matched :: tokenize_helper (String.sub str len (String.length str - len))
else match match_fixed str with
| Some (tok, matched) ->
let len = String.length matched in
tok :: tokenize_helper (String.sub str len (String.length str - len))
| None -> raise InvalidInputException
(* Entry function *)
let tokenize str = tokenize_helper str
Editor is loading...
Leave a Comment