Untitled

 avatar
unknown
plain_text
15 days ago
2.9 kB
3
Indexable
open TokenTypes

exception InvalidInputException

(* Define regexes for complex tokens *)
let int_re = Re.Perl.compile_pat "-?[0-9]+"
let bool_re = Re.Perl.compile_pat "true\\|false"
let id_re = Re.Perl.compile_pat "[a-zA-Z][a-zA-Z0-9]*"
let whitespace_re = Re.Perl.compile_pat "[ \t\n]+"

(* List of fixed tokens, longest matches come first *)
let token_regexes = [
  ("==", Tok_Equal);
  ("!=", Tok_NotEqual);
  (">=", Tok_GreaterEqual);
  ("<=", Tok_LessEqual);
  ("||", Tok_Or);
  ("&&", Tok_And);
  ("=", Tok_Assign);
  (">", Tok_Greater);
  ("<", Tok_Less);
  ("!", Tok_Not);
  (";", Tok_Semi);
  ("(", Tok_LParen);
  (")", Tok_RParen);
  ("{", Tok_LBrace);
  ("}", Tok_RBrace);
  ("+", Tok_Add);
  ("-", Tok_Sub);
  ("*", Tok_Mult);
  ("/", Tok_Div);
  ("^", Tok_Pow);
  ("int", Tok_Int_Type);
  ("bool", Tok_Bool_Type);
  ("printf", Tok_Print);
  ("main", Tok_Main);
  ("if", Tok_If);
  ("else", Tok_Else);
  ("for", Tok_For);
  ("from", Tok_From);
  ("to", Tok_To);
  ("while", Tok_While);
]

(* Helper to match any of the fixed regex tokens *)
let rec match_fixed str =
  match token_regexes with
  | [] -> None
  | (pat, tok) :: rest ->
      let re = Re.Perl.compile_pat pat in
      if Re.execp re str then
        let matched = Re.Group.get (Re.exec re str) 0 in
        Some (tok, matched)
      else match_fixed rest

(* Recursive tokenizer *)
let rec tokenize_helper str =
  let str = String.trim str in
  if str = "" then [EOF]
  else if Re.execp whitespace_re str then
    let matched = Re.Group.get (Re.exec whitespace_re str) 0 in
    let len = String.length matched in
    tokenize_helper (String.sub str len (String.length str - len))

  else if Re.execp int_re str then
    let matched = Re.Group.get (Re.exec int_re str) 0 in
    let len = String.length matched in
    Tok_Int (int_of_string matched) :: tokenize_helper (String.sub str len (String.length str - len))

  else if Re.execp bool_re str then
    let matched = Re.Group.get (Re.exec bool_re str) 0 in
    let len = String.length matched in
    let value = matched = "true" in
    Tok_Bool value :: tokenize_helper (String.sub str len (String.length str - len))

  else if Re.execp id_re str then
    let matched = Re.Group.get (Re.exec id_re str) 0 in
    let len = String.length matched in
    (* Make sure it's not a reserved keyword *)
    match List.assoc_opt matched token_regexes with
    | Some tok -> tok :: tokenize_helper (String.sub str len (String.length str - len))
    | None -> Tok_ID matched :: tokenize_helper (String.sub str len (String.length str - len))

  else match match_fixed str with
    | Some (tok, matched) ->
        let len = String.length matched in
        tok :: tokenize_helper (String.sub str len (String.length str - len))
    | None -> raise InvalidInputException

(* Entry function *)
let tokenize str = tokenize_helper str
Editor is loading...
Leave a Comment