package edu.berkeley.cs.db.yfilterplus.queryparser.xpathparser; import java_cup.runtime.*; import edu.berkeley.cs.db.yfilterplus.queryparser.xpathparser.sym; %% %class Lexer %unicode %cup %cupdebug %ignorecase /* do we want this? */ %line %column //%final /* ? */ //%pack /* ? */ %eofval{ return symbol(sym.EOF); %eofval} %{ private Symbol symbol (int type) { return new Symbol(type, yyline, yycolumn); } private Symbol symbol (int type, Object value) { return new Symbol(type, yyline, yycolumn, value); } private String errorString (String errorDescription) { return ("line " + yyline + ", column " + yycolumn + " : " + errorDescription); } private StringBuffer string = new StringBuffer(); static boolean debug = false; // input buffer for better error reporting protected StringBuffer inputBuffer = new StringBuffer(); // return all tokens read so far public StringBuffer getInputBuffer () { return inputBuffer; } // set debug flag from the command line public void setDebug (boolean flag) { debug = flag; } %} letter = [a-zA-Z] digit = [0-9] number = [1-9][0-9]* literal = ([\"] [^\"]* [\"]) ncnamechar = {letter} | {digit} | "." | "-" | "_" ncname = ({letter} | "_" ) {ncnamechar}* line_terminator = \r | \n | \r\n /* from JFlex manual*/ white_space = line_terminator | [ \t\f] /* ? */ %state STRING %% { /* literals */ /* \" {string.setLength(0); yybegin(STRING); } */ /* whitespace */ {white_space} {/* ignore */} {number} {inputBuffer.append(yytext()); if (debug) System.err.println("lex: " + yytext()); return symbol(sym.NUMBER, yytext()); } {literal} {inputBuffer.append(yytext()); if (debug) System.err.println("lex: " + yytext()); return symbol(sym.LITERAL, yytext()); } "." {inputBuffer.append(yytext()); if (debug) System.err.println("lex:."); return symbol(sym.DOT); } "=" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:="); return symbol(sym.EQUALS); } "!=" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:!="); return symbol(sym.NOT_EQUALS); } "(" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:("); return symbol(sym.L_PAREN); } ")" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:)"); return symbol(sym.R_PAREN); } "[" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:["); return symbol(sym.L_BRACKET); } "]" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:]"); return symbol(sym.R_BRACKET); } ">" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:>"); return symbol(sym.GREATER_THAN); } "<" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:<"); return symbol(sym.LESS_THAN); } ">=" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:>="); return symbol(sym.GREATER_THAN_EQUALS); } "<=" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:<="); return symbol(sym.LESS_THAN_EQUALS); } "//" {inputBuffer.append(yytext()); if (debug) System.err.println("lex://"); return symbol(sym.DOUBLE_SLASH); } "/" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:/"); return symbol(sym.SLASH); } "@" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:@"); return symbol(sym.AT); } "*" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:*"); return symbol(sym.STAR); } "_" {inputBuffer.append(yytext()); if (debug) System.err.println("lex:_"); return symbol(sym.UNDERSCORE); } {ncname} {inputBuffer.append(yytext()); if (debug) System.err.println("lex:" + yytext());return symbol(sym.NCNAME, yytext()); } /* {letter} {return symbol(sym.LETTER, yytext()); } */ /* {ncnamechar} {return symbol(sym.NCNAMECHAR, yytext()); } */ {line_terminator} {/* ignore */} } /* { \" { yybegin(YYINITIAL); inputBuffer.append(yytext()); if (debug) System.err.println("lex: " + string.toString() ); return symbol(sym.LITERAL, string.toString()); } [^\n\r\"\\]+ { string.append( yytext() ); } \\t { string.append('\t'); } \\n { string.append('\n'); } \\r { string.append('\r'); } \\\" { string.append('\"'); } \\ { string.append('\\'); } } */ /* error fallback */ /* .|\n { throw new Error("Illegal character <"+yytext()+">"); } */