/* Lexical analyzer for Titanium */ /* Author: Paul N. Hilfinger */ /* Copyright (C) 1995, 1997, 2003, 2004 by the Regents of the University * of California. All rights reserved. */ %{ #include #include #include #include #include #include #include "AST.h" #include "errors.h" #include "lex-string.h" #include "parse.h" #include "tokens.h" #include "parser.h" /* Current position in the source file. You must maintain this. */ static SourcePosn srcPosn; static bool titaniumSource; extern CompileUnitNode_loadinfo_t current_CompileUnitNode_info; /* The undefined position. Used to as the position of nodes that */ /* have no meaningful position in the source. */ const SourcePosn NoSourcePosition = { NULL, 0 }; static inline void setPosn() { yylval.Terminal.posn = srcPosn; srcPosn.posn += yyleng; } static map filepos_map; static string curfile; string stripCWDprefix(string s) { while (s.find("./") == 0) s = s.erase(0,2); return s; } void parse(FILE *fp, string name, string tmpname, bool titaniumFile) { yyin = fp; filepos_map.clear(); srcPosn.file = new File(stripCWDprefix(name)); srcPosn.posn = 1; curfile = tmpname; titaniumSource = titaniumFile; yyparse(); } static void noteEmbeddedNewlines(); // Return titanium keyword if parsing titanium code, normal identifier otherwise static int tikw(int kw) { if (titaniumSource) return kw; // Java, handle as identifier yylval.Terminal.val = intern(yytext); return IDENTIFIER; } %} HEX [0-9A-Fa-f] DEC [0-9] OCT [0-7] Letter [a-zA-Z_$] AlphaNum ({Letter}|{DEC}) UnicodeChar (\\u+{HEX}{HEX}{HEX}{HEX}) LiteralChar (\\([btnfr"'\\]|{OCT}{1,2}|[0-3]{OCT}{2})|[^'"\\\n]|{UnicodeChar}) C_StyleComment ("/*"([^*]|"*"+[^*/])*"*"+"/") CPlusPlusStyleComment ("//".*"\n") PreprocessorDirective ("#".*"\n") %option noyywrap %option nodefault %option nounput %% /* Whitespace */ [ \f\t\x0D]+ { srcPosn.posn += yyleng; } \n { srcPosn.file->defineLineEnd(srcPosn.posn); srcPosn.posn += 1; } /* Comments (Section 1.4) */ /* Traditional and Doc comments */ {C_StyleComment} { noteEmbeddedNewlines(); srcPosn.posn += yyleng; } /* C++-style comment */ {CPlusPlusStyleComment} { srcPosn.posn += yyleng; srcPosn.file->defineLineEnd(srcPosn.posn-1); } /* line directive from the preprocessor looks like: '#[line] [0-9]+ ("filename") [ignored junk]\n' pragma looks like: '#pragma TI arguments...\n' */ {PreprocessorDirective} { char *p = yytext; assert(*p == '#'); p++; while (*p && isspace(*p)) p++; if (!strncmp(p,"pragma ",7)) { p += 6; while (*p && isspace(*p)) p++; if (toupper(p[0]) == 'T' && toupper(p[1]) == 'I' && isspace(toupper(p[2]))) { p += 2; while (*p && isspace(*p)) p++; if (!strncmp(p, "nobcheck", 8)) { current_CompileUnitNode_info.pragma.force_nobcheck = 1; YY_BREAK; } else if (!strncmp(p, "nosrcpos", 8)) { current_CompileUnitNode_info.pragma.no_srcpos = 1; YY_BREAK; } else { Error(srcPosn) << "unrecognized #pragma Ti " << p << endl; YY_BREAK; } } else { /* ignore other "#pragma"s, which are inserted by some preprocessors (e.g. gcc on darwin) */ YY_BREAK; } } while (*p && !isdigit(*p)) p += 1; int linenum = atoi(p); if (linenum <= 0) { cerr << "Unrecognized line directive:\n" << yytext << endl; exit(1); } while (*p && isdigit(*p)) p += 1; while (*p && *p != '\"') p += 1; if (*p != '\"') { /* xlc omits the filename if it hasn't changed */ while ((int) srcPosn.posnToLineNumber() < linenum) srcPosn.file->defineLineEnd(srcPosn.posn++); } else { p += 1; char *begin = p; while (*p != '\0' && *p != '\"') p += 1; if (*p != '\"') { cerr << "Unrecognized line directive:\n" << yytext << endl; exit(1); } char name[255]; strncpy(name, begin, p-begin); name[p-begin] = '\0'; string newname = stripCWDprefix(string(name)); filepos_map[curfile] = srcPosn; // store old file pos if (filepos_map.count(newname)) { // saw it before, restore srcPosn = filepos_map[newname]; } else { // new file srcPosn.file = new File(newname); srcPosn.posn = 1; } while ((int)srcPosn.posnToLineNumber() < linenum) srcPosn.file->defineLineEnd(srcPosn.posn++); curfile = newname; } } /* Keywords (Section 1.5) */ "abstract" { setPosn(); return ABSTRACT; } "assert" { setPosn(); return ASSERT; } "boolean" { setPosn(); return BOOLEAN; } "break" { setPosn(); return BREAK; } "byte" { setPosn(); return BYTE; } "case" { setPosn(); return CASE; } "catch" { setPosn(); return CATCH; } "char" { setPosn(); return CHAR; } "class" { setPosn(); return CLASS; } "continue" { setPosn(); return CONTINUE; } "default" { setPosn(); return DEFAULT; } "do" { setPosn(); return DO; } "double" { setPosn(); return DOUBLE; } "else" { setPosn(); return ELSE; } "extends" { setPosn(); return EXTENDS; } "final" { setPosn(); return FINAL; } "finally" { setPosn(); return FINALLY; } "float" { setPosn(); return FLOAT; } "for" { setPosn(); return FOR; } "if" { setPosn(); return IF; } "implements" { setPosn(); return IMPLEMENTS; } "import" { setPosn(); return IMPORT; } "instanceof" { setPosn(); return INSTANCEOF; } "int" { setPosn(); return INT; } "interface" { setPosn(); return INTERFACE; } "long" { setPosn(); return LONG; } "native" { setPosn(); return NATIVE; } "new" { setPosn(); return NEW; } "null" { setPosn(); return NULL_VAL; } "package" { setPosn(); return _PACKAGE; } "private" { setPosn(); return PRIVATE; } "protected" { setPosn(); return PROTECTED; } "public" { setPosn(); return PUBLIC; } "return" { setPosn(); return RETURN; } "short" { setPosn(); return SHORT; } "static" { setPosn(); return STATIC; } "strictfp" { setPosn(); return STRICTFP; } "super" { setPosn(); return SUPER; } "switch" { setPosn(); return SWITCH; } "synchronized" { setPosn(); return SYNCHRONIZED; } "this" { setPosn(); return THIS; } "throw" { setPosn(); return THROW; } "throws" { setPosn(); return THROWS; } "transient" { setPosn(); return TRANSIENT; } "try" { setPosn(); return TRY; } "void" { setPosn(); return VOID; } "volatile" { setPosn(); return VOLATILE; } "while" { setPosn(); return WHILE; } /* Additional Titanium keywords */ "foreach" { setPosn(); return tikw(FOREACH); } "single" { setPosn(); return tikw(SINGLE); } "sglobal" { setPosn(); return tikw(SGLOBAL); } "partition" { setPosn(); return tikw(PARTITION); } "op" { setPosn(); return tikw(OPERATOR); } "local" { setPosn(); return tikw(LOCAL); } "immutable" { setPosn(); return tikw(IMMUTABLE); } "inline" { setPosn(); return tikw(INLINE); } "overlap" { setPosn(); return tikw(OVERLAP); } "broadcast" { setPosn(); return tikw(BROADCAST); } "template" { setPosn(); return tikw(TEMPLATE); } "nonshared" { setPosn(); return tikw(NONSHARED); } "polyshared" { setPosn(); return tikw(POLYSHARED); } /* Keywords that are reserved (illegal as identifiers), but not used. */ "const" | "goto" { Error(srcPosn) << "reserved keyword " << yytext << " ignored" << endl; } /* Boolean literals (Section 1.7.3) */ "true" { setPosn(); return TRUE_LITERAL; } "false" { setPosn(); return FALSE_LITERAL; } /* Identifiers (Section 1.6) */ {Letter}{AlphaNum}* { setPosn(); yylval.Terminal.val = intern (yytext); return IDENTIFIER; } /* Integer literals (Section 1.7.1) */ [1-9]{DEC}* { setPosn(); yylval.Terminal.val = new string (yytext); return INT_LITERAL; } 0{OCT}* { setPosn(); yylval.Terminal.val = new string (yytext); return INT_LITERAL; } 0[xX]{HEX}+ { setPosn(); yylval.Terminal.val = new string (yytext); return INT_LITERAL; } [1-9]{DEC}*[lL] { setPosn(); yylval.Terminal.val = new string (yytext, yyleng-1); return LONG_LITERAL; } 0{OCT}*[lL] { setPosn(); yylval.Terminal.val = new string (yytext, yyleng-1); return LONG_LITERAL; } 0[xX]{HEX}+[lL] { setPosn(); yylval.Terminal.val = new string (yytext, yyleng-1); return LONG_LITERAL; } /* Floating-Point literals (Section 1.7.2) */ /* Note: The 10/30/95 version allows, e.g., "1.0e". I'm going to assume */ /* is wrong. Is it? */ {DEC}+"."{DEC}*([eE][-+]?{DEC}+)?[dD]? | "."{DEC}+([eE][-+]?{DEC}+)?[dD]? | {DEC}+[dD] | {DEC}+([eE][-+]?{DEC}+)[dD]? { setPosn(); yylval.Terminal.val = new string (yytext); return DOUBLE_LITERAL; } {DEC}+"."{DEC}*([eE][-+]?{DEC}+)?[fF] | "."{DEC}+([eE][-+]?{DEC}+)?[fF] | {DEC}+([eE][-+]?{DEC}+)[fF] | {DEC}+[fF] { setPosn(); yylval.Terminal.val = new string (yytext); return FLOAT_LITERAL; } /* Character literals (Section 1.7.4) */ "'"({LiteralChar}|"\"")"'" { setPosn(); yylval.CharTerminal.val = convertCharacter(yytext+1, yyleng-2, srcPosn); return CHARACTER_LITERAL; } "'"[^'\n]*"'"? { Error (srcPosn) << "invalid character literal" << endl; setPosn(); yylval.CharTerminal.val = '?'; return CHARACTER_LITERAL; } /* String literals (Section 1.7.5) */ "\""({LiteralChar}|"'")*"\"" { setPosn(); yylval.StrTerminal.val = new string16( convertString(yytext+1, yyleng-2, srcPosn) ); return STRING_LITERAL; } "\""[^"\n]*"\""? { Error (srcPosn) << "invalid string literal" << endl; setPosn(); yylval.StrTerminal.val = new string16; return STRING_LITERAL; } /* Separators (Section 1.8) */ [(){}\[\];,.] { setPosn(); return yytext[0]; } /* Operators (Section 1.9) */ [-=>=" { setPosn(); return GE; } "<<" { setPosn(); return LSHIFTL; } ">>" { setPosn(); return ASHIFTR; } ">>>" { setPosn(); return LSHIFTR; } "+=" { setPosn(); return PLUS_ASG; } "-=" { setPosn(); return MINUS_ASG; } "*=" { setPosn(); return MULT_ASG; } "/=" { setPosn(); return DIV_ASG; } "%=" { setPosn(); return REM_ASG; } "<<=" { setPosn(); return LSHIFTL_ASG; } ">>=" { setPosn(); return ASHIFTR_ASG; } ">>>=" { setPosn(); return LSHIFTR_ASG; } "&=" { setPosn(); return AND_ASG; } "^=" { setPosn(); return XOR_ASG; } "|=" { setPosn(); return OR_ASG; } "++" { setPosn(); return PLUSPLUS; } "--" { setPosn(); return MINUSMINUS; } /* Titanium */ "=>" { setPosn(); return GUARDS; } /* Errors */ . { if (isgraph (yytext[0])) { Error (srcPosn) << "unrecognized character: `" << yytext[0] << "'" << endl; } else { char str[5]; sprintf(str, "%04x", yytext[0]); Error (srcPosn) << "unrecognized character: `" << str << "'" << endl; } setPosn(); } %% /* The lexer's current position (a kludge). */ SourcePosn lexerPosition() { return srcPosn; } static void noteEmbeddedNewlines() { char* p; p = strchr(yytext, '\n'); while (p != NULL) { srcPosn.file->defineLineEnd(srcPosn.posn + (p - yytext)); p = strchr(p+1, '\n'); } }