(* Lexer 15-411 by Roland Flury Modified 2004 by Arthur O'Dwyer *) (* @version $Id: lexer.mll,v 1.3 2004/10/30 19:33:22 ajo Exp $ *) { open Grammar (* open the grammar for the tokens *) exception EXIT (* This error is raised to exit the program *) let lstart = Lexing.lexeme_start let lend = Lexing.lexeme_end let get = Lexing.lexeme let error p1 p2 msg = ( Errormsg.error (p1, p2) ("Parser says: "^msg); raise EXIT ) (***********************************************************************) (* Comment handling *) (***********************************************************************) let commentLevel = ref 0 let commentPos = ref 0 let linCom = ref false let enterComment yypos = commentLevel := !commentLevel + 1; commentPos := yypos let linComStart yypos = linCom := true; commentPos := yypos let isLinCom () = !linCom let linComEnd () = linCom := false let exitComment () = commentLevel := !commentLevel - 1; !commentLevel = 0 let isCom () = !commentLevel > 0 (***********************************************************************) (* CPP support *) (***********************************************************************) let fileOffset = ref 0 let extractFileName s = let su = String.sub s 1 (String.length s -1) in let pos = String.index su '"' in String.sub su 0 pos (***********************************************************************) (* Miscellaneous *) (***********************************************************************) (* Read end-of-file *) let eof () = if (!commentLevel > 0) then begin error !commentPos !commentPos "Unterminated comment"; raise EXIT end else (); EOF } let alpha = ['a'-'z' 'A'-'Z' '_'] let digit = ['0'-'9'] let id = alpha(digit|alpha)* let nl = ('\r' | '\n' | "\r\n") let fname = ('"')[^'\r' '\n']*('"')[^'\r' '\n']* nl (***********************************************************************) (* Parse the source-code / comments / cpp *) (***********************************************************************) rule token = parse | eof { eof() } | [' ''\t']+ { token lexbuf } (* ignore white space *) | nl { Errormsg.startNewline (lstart lexbuf); if isCom() then ( token lexbuf ) else ( token lexbuf ) } (* Parentheses *) | '{' { LBRACE } | '}' { RBRACE } | '(' { LPAREN } | ')' { RPAREN } | '[' { LBRACKET } | ']' { RBRACKET } (* Unary operators *) | '-' { MINUS } (* icky! *) | '!' { LOGNEGATE } | '~' { BITNEGATE } | '&' { BITAND } (* icky! *) | '*' { TIMES } (* Binary operators *) | '+' { PLUS } | '*' { TIMES } (* icky! *) | '-' { MINUS } | '/' { DIVIDE } | '%' { MOD } | '<' { RELLT } | "<=" { RELLE } | '>' { RELGT } | ">=" { RELGE } | "==" { RELEQ } | "!=" { RELNE } | "&&" { LOGAND } (* foo *) | "||" { LOGOR } | "^^" { LOGXOR } | '&' { BITAND } | '|' { BITOR } | '^' { BITXOR } | "<<" { BITSHL } | ">>" { BITSHR } | "*+" { PTRPLUS } | "*-" { PTRMINUS } | "*==" { PTREQ } | "*!=" { PTRNE } (* Assignments *) | '=' { ASSIGN } | "+=" { PLUSASSIGN } | "*=" { TIMESASSIGN } | "-=" { MINUSASSIGN } | "/=" { DIVIDEASSIGN } | "%=" { MODASSIGN } (* Keywords *) | "bool" { BOOL } | "int" { INT } | "true" { TRUE } | "false" { FALSE } | "NULL" { NULL } | "offset" { OFFSET } | "size" { SIZE } | "alloc" { ALLOC } | "var" { VAR } | "struct" { STRUCT } | "if" { IF } | "else" { ELSE } | "return" { RETURN } (* Separators *) | ';' { SEMICOL } | ':' { COLON } | ',' { COMMA } | '.' { DOT } | "->" { ARROW } (* chars & numbers *) | digit+ { INTCONST(Int32.of_string (get lexbuf)) } | id { ID(get lexbuf) } (* Comments *) | "//" { linComStart (lstart lexbuf); comment lexbuf } | "/*" { enterComment (lstart lexbuf); comment lexbuf } | "*/" { error (lstart lexbuf) (lend lexbuf-1) "Unbalanced comment"; raise EXIT; token lexbuf } | "#" { cpptoken lexbuf } (* Default (error) *) | _ { Errormsg.error (lstart lexbuf, lend lexbuf - 1) ("illegal character '" ^ get lexbuf ^ "'"); raise EXIT; token lexbuf } (***********************************************************************) (* Parse comments *) (***********************************************************************) and comment = parse | "/*" { if not (isLinCom ()) then ( enterComment (lstart lexbuf) ) else (); comment lexbuf } | "*/" { if not (isLinCom ()) && exitComment () then ( token lexbuf ) else ( comment lexbuf ) } | nl { if isLinCom () then ( linComEnd () ) else (); Errormsg.startNewline (lstart lexbuf); if isCom() then ( comment lexbuf ) else ( token lexbuf )} | eof { eof() } | _ { comment lexbuf } (***********************************************************************) (* Parse cpp annotations *) (***********************************************************************) and cpptoken = parse | digit+ { fileOffset := (Pervasives.int_of_string (get lexbuf)); cpptoken lexbuf } | fname { let fname = get lexbuf in Errormsg.startNewFile (lstart lexbuf + String.length fname - 1) (extractFileName fname) !fileOffset; token lexbuf } | nl { token lexbuf } | [' ''\t']+ { cpptoken lexbuf } (* ignore white space *) | eof { eof() } | _ { Errormsg.error (lstart lexbuf, lend lexbuf - 1) ("Illegal CPP instruction: " ^ (get lexbuf)); raise EXIT }