Untitled

 avatar
unknown
plain_text
a year ago
6.3 kB
4
Indexable
/*
 *  The scanner definition for COOL.
 */

/*
 *  Stuff enclosed in %{ %} in the first section is copied verbatim to the
 *  output, so headers and global definitions are placed here to be visible
 * to the code in the file.  Don't remove anything that was here initially
 */

%{
#include <cool-parse.h>
#include <stringtab.h>
#include <utilities.h>

/* The compiler assumes these identifiers. */
#define yylval cool_yylval
#define yylex  cool_yylex

/* Max size of string constants */
#define MAX_STR_CONST 1025
#define YY_NO_UNPUT   /* keep g++ happy */
/*
   The two statements below are here just so this program will compile.
   You may need to change or remove them on your final code.
*/
#define yywrap() 1
#define YY_SKIP_YYWRAP

extern FILE *fin; /* we read from this file */

/* define YY_INPUT so we read from the FILE fin:
 * This change makes it possible to use this scanner in
 * the Cool compiler.
 */
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
	if ( (result = fread( (char*)buf, sizeof(char), max_size, fin)) < 0) \
		YY_FATAL_ERROR( "read() in flex scanner failed");

char string_buf[MAX_STR_CONST]; /* to assemble string constants */
char *string_buf_ptr;

extern int curr_lineno;
extern int verbose_flag;

extern YYSTYPE cool_yylval;

/*
 *  Add Your own definitions here
 */

int nestedComments = 0;
std::string readString = "";

%}

/*
 * Define names for regular expressions here.
 */

SINGLE_TOKENS [{|}|(|)|:|;|@|,|.|+|\-|*|/|=|<|~]
QUOTES \"

%START MULTILINE_COMMENT
%START SINGLELINE_COMMENT
%START STRING


DARROW          =>
LE              <=
ASSIGN          <-

DIGIT      [0-9]

OPERATORS       ("+"|"-"|"*"|"/")
PUNCTUATION     (":"|";"|"."|",")
SYMBOLS         ("("|")"|"{"|"}"|"@"|"~"|"<"|"=")

TYPEID          [A-Z][a-zA-Z0-9_]*
OBJECTID        [a-z][a-zA-Z0-9_]*

CLASS           (?i:class)
IF              (?i:if)
THEN            (?i:then)
ELSE            (?i:else)
FI              (?i:fi)
IN              (?i:in)
INHERITS        (?i:inherits)
ISVOID          (?i:isvoid)
LET             (?i:let)
LOOP            (?i:loop)
POOL            (?i:pool)
WHILE           (?i:while)
CASE            (?i:case)
ESAC            (?i:esac)
NEW             (?i:new)
OF              (?i:of)
NOT             (?i:not)

"t"[rR][uU][eE] {
  cool_yylval.boolean = 1;
  return (BOOL_CONST);
}
"f"[aA][lL][sS][eE] {
  cool_yylval.boolean = 0;
  return (BOOL_CONST);
}

[ \t\r\f\v]+ {/*skip whitespace*/}

\n          { curr_lineno++; }

{DIGIT}+ {
  cool_yylval.symbol = idtable.add_string(yytext);
  return (INT_CONST);
}

{TYPEID} {
  cool_yylval.symbol = idtable.add_string(yytext);
  return (TYPEID);
}

{OBJECTID} {
  cool_yylval.symbol = idtable.add_string(yytext);
  return (OBJECTID);
}

%%

 /*
  *  Nested comments
  */

<INITIAL,MULTILINE_COMMENT,SINGLELINE_COMMENT>"(*" {
  commentLevel += 1;
  BEGIN MULTILINE_COMMENT;
}

<MULTILINE_COMMENT>[^\n(*]* { }

<MULTILINE_COMMENT>[()*] { }

<MULTILINE_COMMENT>"*)" {
  commentLevel -= 1;
  if (commentLevel == 0) BEGIN INITIAL;
}

<MULTILINE_COMMENT><<EOF>> {
  BEGIN INITIAL;

  yylval.error_msg = "EOF in comment";
  return (ERROR);
}

<STR>{
  {QUOTES} {
    *string_buf_ptr = '\0';
    cool_yylval.symbol = stringtable.add_string(string_buf);
    BEGIN(INITIAL);
    return (STR_CONST);
  }

  <<EOF>> {
    strcpy(cool_yylval.error_msg, "EOF in string constant");
    BEGIN(INITIAL);
    return (ERROR);
  }

  (\0|\\\0) {
    strcpy(cool_yylval.error_msg, "Null character in string");
    BEGIN(TREAT_STR_ERROR);
    return(ERROR);
  }

  \n {
    strcpy(cool_yylval.error_msg, "Unterminated string constant");
    curr_lineno++;
    BEGIN(INITIAL);
    return (ERROR);
  }

  /*
  *  String constants (C syntax)
  *  Escape sequence \c is accepted for all characters c. Except for 
  *  \n \t \b \f, the result is c. 
  */

  \\n {
    if (not read_char('\n')){
      return long_str_error();
    };
  }
  \\t {
    if (not read_char('\t')){
      return long_str_error();
    }
  }
  \\r {
    if (not read_char('\r')){
      return long_str_error();
    }
  }
  \\b {
    if (not read_char('\b')){
      return long_str_error();
    }
  }
  \\f {
    if (not read_char('\f')){
      return long_str_error();
    }
  }

  \\(.|\n) { 
    if (not read_char(yytext[1])){
      return long_str_error();
    } 
  }

  /* Reads all other characters */
  [^\\\n\"]+ {  
    char *yptr = yytext;
    while (*yptr){
      if (not read_char(*yptr)){
        return long_str_error();
    }
      yptr++;
    }
  }  
}

<INITIAL,MULTILINE_COMMENT,SINGLELINE_COMMENT>"(*" {
  commentLevel += 1;
  BEGIN MULTILINE_COMMENT;
}

<MULTILINE_COMMENT>[^\n(*]* { }

<MULTILINE_COMMENT>[()*] { }

<MULTILINE_COMMENT>"*)" {
  commentLevel -= 1;
  if (commentLevel == 0) BEGIN INITIAL;
}

<MULTILINE_COMMENT><<EOF>> {
  BEGIN INITIAL;

  yylval.error_msg = "EOF in comment";
  return (ERROR);
}

 /*
  *  The multiple-character operators.
  */
{DARROW}		{ return (DARROW); }
{ASSIGN}		{ return (ASSIGN); }
{LE}		{ return (LE); }

{CLASS}		      { return (CLASS); }
{IF}		        { return (IF); }
{THEN}		      { return (THEN); }
{ELSE}		      { return (ELSE); }
{FI}		        { return (FI); }
{IN}		        { return (IN); }
{INHERITS}		  { return (INHERITS); }
{ISVOID}		    { return (ISVOID); }
{LET}		        { return (LET); }
{LOOP}		      { return (LOOP); }
{POOL}		      { return (POOL); }
{WHILE}		      { return (WHILE); }
{CASE}		      { return (CASE); }
{ESAC}		      { return (ESAC); }
{NEW}		        { return (NEW); }
{OF}		        { return (OF); }
{NOT}		        { return (NOT); }

{SYMBOLS}       { return yytext[0]; }

{PUNCTUATION}   { return yytext[0]; }

{OPERATORS}     { return yytext[0]; }

{TRUE}          { yylval.boolean = 1; return (BOOL_CONST); }
{FALSE}         { yylval.boolean = 0; return (BOOL_CONST); }

{DIGIT}+        { yylval.symbol = inttable.add_string(yytext); return (INT_CONST); }

{TYPEID}        { yylval.symbol = idtable.add_string(yytext); return (TYPEID); }
{OBJECTID}      { yylval.symbol = idtable.add_string(yytext); return (OBJECTID); }

 /*
  * Keywords are case-insensitive except for the values true and false,
  * which must begin with a lower-case letter.
  */


 /*
  *  String constants (C syntax)
  *  Escape sequence \c is accepted for all characters c. Except for 
  *  \n \t \b \f, the result is c.
  *
  */


%%
Editor is loading...
Leave a Comment