(*
   Lexer
   15-411
   by Roland Flury
   Modified 2004 by Arthur O'Dwyer
*)
(* @version $Id: lexer.mll,v 1.3 2004/10/30 19:33:22 ajo Exp $ *)

{
 open Grammar               (* open the grammar for the tokens *)
 exception EXIT    (* This error is raised to exit the program *)

let lstart = Lexing.lexeme_start
let lend = Lexing.lexeme_end
let get = Lexing.lexeme

let error p1 p2 msg = (
  Errormsg.error (p1, p2) ("Parser says: "^msg);
  raise EXIT
)

(***********************************************************************)
(* Comment handling *)
(***********************************************************************)
let commentLevel = ref 0
let commentPos = ref 0
let linCom = ref false

let enterComment yypos =
  commentLevel := !commentLevel + 1;
  commentPos := yypos

let linComStart yypos =
  linCom := true;
  commentPos := yypos

let isLinCom () = !linCom
let linComEnd () = linCom := false

let exitComment () =
   commentLevel := !commentLevel - 1;
   !commentLevel = 0

let isCom () =
  !commentLevel > 0

(***********************************************************************)
(* CPP support *)
(***********************************************************************)
let fileOffset = ref 0

let extractFileName s =
  let su = String.sub s 1 (String.length s -1) in
  let pos = String.index su '"' in
  String.sub su 0 pos

(***********************************************************************)
(* Miscellaneous *)
(***********************************************************************)

(* Read end-of-file *)
let eof () =
  if (!commentLevel > 0) then
    begin
      error !commentPos !commentPos "Unterminated comment";
      raise EXIT
    end
  else
    ();
  EOF

}

let alpha  = ['a'-'z' 'A'-'Z' '_']
let digit  = ['0'-'9']
let id     = alpha(digit|alpha)*
let nl     = ('\r' | '\n' | "\r\n")
let fname = ('"')[^'\r' '\n']*('"')[^'\r' '\n']* nl


(***********************************************************************)
(* Parse the source-code / comments / cpp *)
(***********************************************************************)

rule token = parse

| eof                      { eof() }
| [' ''\t']+               { token lexbuf } (* ignore white space *)
| nl                       { Errormsg.startNewline (lstart lexbuf);
                             if isCom() then (
                               token lexbuf
                             )  else (
                               token lexbuf
                             )
                           }
(* Parentheses *)
| '{'                      { LBRACE }
| '}'                      { RBRACE }
| '('                      { LPAREN }
| ')'                      { RPAREN }
| '['                      { LBRACKET }
| ']'                      { RBRACKET }
(* Unary operators *)
| '-'                      { MINUS } (* icky! *)
| '!'                      { LOGNEGATE }
| '~'                      { BITNEGATE }
| '&'                      { BITAND } (* icky! *)
| '*'                      { TIMES }
(* Binary operators *)
| '+'                      { PLUS }
| '*'                      { TIMES } (* icky! *)
| '-'                      { MINUS }
| '/'                      { DIVIDE }
| '%'                      { MOD }
| '<'                      { RELLT }
| "<="                     { RELLE }
| '>'                      { RELGT }
| ">="                     { RELGE }
| "=="                     { RELEQ }
| "!="                     { RELNE }
| "&&"                     { LOGAND } (* foo *)
| "||"                     { LOGOR }
| "^^"                     { LOGXOR }
| '&'                      { BITAND }
| '|'                      { BITOR }
| '^'                      { BITXOR }
| "<<"                     { BITSHL }
| ">>"                     { BITSHR }
| "*+"                     { PTRPLUS }
| "*-"                     { PTRMINUS }
| "*=="                    { PTREQ }
| "*!="                    { PTRNE }
(* Assignments *)
| '='                      { ASSIGN }
| "+="                     { PLUSASSIGN }
| "*="                     { TIMESASSIGN }
| "-="                     { MINUSASSIGN }
| "/="                     { DIVIDEASSIGN }
| "%="                     { MODASSIGN }
(* Keywords *)
| "bool"                   { BOOL }
| "int"                    { INT }
| "true"                   { TRUE }
| "false"                  { FALSE }
| "NULL"                   { NULL }
| "offset"                 { OFFSET }
| "size"                   { SIZE }
| "alloc"                  { ALLOC }
| "var"                    { VAR }
| "struct"                 { STRUCT }
| "if"                     { IF }
| "else"                   { ELSE }
| "return"                 { RETURN }
(* Separators *)
| ';'                      { SEMICOL }
| ':'                      { COLON }
| ','                      { COMMA }
| '.'                      { DOT }
| "->"                     { ARROW }
(* chars & numbers *)
| digit+                   { INTCONST(Int32.of_string (get lexbuf)) }
| id                       { ID(get lexbuf) }
(* Comments *)
|  "//"                    { linComStart (lstart lexbuf);
                             comment lexbuf
                           }
|  "/*"                    { enterComment (lstart lexbuf);
                             comment lexbuf
                           }
|  "*/"                    { error (lstart lexbuf)
                                             (lend lexbuf-1)
                               "Unbalanced comment";
                             raise EXIT;
                             token lexbuf
                           }
| "#"                      { cpptoken lexbuf }
(* Default (error) *)
|  _                       { Errormsg.error (lstart lexbuf,
                                             lend lexbuf - 1)
                               ("illegal character '"
                                ^ get lexbuf
                                ^ "'");
                             raise EXIT;
                             token lexbuf
                           }

(***********************************************************************)
(* Parse comments *)
(***********************************************************************)

and comment = parse
|  "/*"
    {
     if not (isLinCom ()) then (
       enterComment (lstart lexbuf)
     ) else ();
     comment lexbuf }
|  "*/"
    {
     if not (isLinCom ()) && exitComment () then (
       token lexbuf
      ) else (
       comment lexbuf )
   }
|  nl
    {
     if isLinCom () then (
       linComEnd ()
      ) else ();
     Errormsg.startNewline (lstart lexbuf);
     if isCom() then (
       comment lexbuf
      )  else (
       token lexbuf )}
|  eof
    { eof() }
|  _
    { comment lexbuf }

(***********************************************************************)
(* Parse cpp annotations *)
(***********************************************************************)

and cpptoken = parse
| digit+
    { fileOffset := (Pervasives.int_of_string (get lexbuf));
      cpptoken lexbuf
    }
| fname
    { let fname = get lexbuf in
      Errormsg.startNewFile
        (lstart lexbuf + String.length fname - 1)
        (extractFileName fname)
        !fileOffset;
      token lexbuf
    }
| nl { token lexbuf }
| [' ''\t']+               { cpptoken lexbuf } (* ignore white space *)
| eof { eof() }
| _  { Errormsg.error (lstart lexbuf, lend lexbuf - 1)
         ("Illegal CPP instruction: " ^ (get lexbuf)); raise EXIT }