Untitled
unknown
plain_text
15 days ago
2.9 kB
3
Indexable
open TokenTypes exception InvalidInputException (* Define regexes for complex tokens *) let int_re = Re.Perl.compile_pat "-?[0-9]+" let bool_re = Re.Perl.compile_pat "true\\|false" let id_re = Re.Perl.compile_pat "[a-zA-Z][a-zA-Z0-9]*" let whitespace_re = Re.Perl.compile_pat "[ \t\n]+" (* List of fixed tokens, longest matches come first *) let token_regexes = [ ("==", Tok_Equal); ("!=", Tok_NotEqual); (">=", Tok_GreaterEqual); ("<=", Tok_LessEqual); ("||", Tok_Or); ("&&", Tok_And); ("=", Tok_Assign); (">", Tok_Greater); ("<", Tok_Less); ("!", Tok_Not); (";", Tok_Semi); ("(", Tok_LParen); (")", Tok_RParen); ("{", Tok_LBrace); ("}", Tok_RBrace); ("+", Tok_Add); ("-", Tok_Sub); ("*", Tok_Mult); ("/", Tok_Div); ("^", Tok_Pow); ("int", Tok_Int_Type); ("bool", Tok_Bool_Type); ("printf", Tok_Print); ("main", Tok_Main); ("if", Tok_If); ("else", Tok_Else); ("for", Tok_For); ("from", Tok_From); ("to", Tok_To); ("while", Tok_While); ] (* Helper to match any of the fixed regex tokens *) let rec match_fixed str = match token_regexes with | [] -> None | (pat, tok) :: rest -> let re = Re.Perl.compile_pat pat in if Re.execp re str then let matched = Re.Group.get (Re.exec re str) 0 in Some (tok, matched) else match_fixed rest (* Recursive tokenizer *) let rec tokenize_helper str = let str = String.trim str in if str = "" then [EOF] else if Re.execp whitespace_re str then let matched = Re.Group.get (Re.exec whitespace_re str) 0 in let len = String.length matched in tokenize_helper (String.sub str len (String.length str - len)) else if Re.execp int_re str then let matched = Re.Group.get (Re.exec int_re str) 0 in let len = String.length matched in Tok_Int (int_of_string matched) :: tokenize_helper (String.sub str len (String.length str - len)) else if Re.execp bool_re str then let matched = Re.Group.get (Re.exec bool_re str) 0 in let len = String.length matched in let value = matched = "true" in Tok_Bool value :: tokenize_helper (String.sub str len (String.length str - len)) else if Re.execp id_re str then let matched = Re.Group.get (Re.exec id_re str) 0 in let len = String.length matched in (* Make sure it's not a reserved keyword *) match List.assoc_opt matched token_regexes with | Some tok -> tok :: tokenize_helper (String.sub str len (String.length str - len)) | None -> Tok_ID matched :: tokenize_helper (String.sub str len (String.length str - len)) else match match_fixed str with | Some (tok, matched) -> let len = String.length matched in tok :: tokenize_helper (String.sub str len (String.length str - len)) | None -> raise InvalidInputException (* Entry function *) let tokenize str = tokenize_helper str
Editor is loading...
Leave a Comment