/*
 * JavaScanner.java is a generated file. You should edit JavaScanner.lex
 * and use JFlex to generate JavaScanner.java.
 * <a href=http://jflex.de>JFlex</a> is available at
 * <a href=http://jflex.de><em>http://jflex.de/</em></a>.
 */
 
/*
 * This file is loosely based on Stephen Ostermiller's
 * <a href=http://ostermiller.org/syntax/>syntax highlighting package</a>
 * at <a href=http://ostermiller.org/syntax/><em>http://ostermiller.org/syntax/</em></a>.
 * My changes may well have introduced errors that Ostermiller did not
 * have. -Carl Burch 
 */
 
/**
 * Implements a scanner for retrieving Java tokens from a
 * stream.
 *
 * @author Carl Burch
 * @version 0.1 2005-05-31
 */

package com.cburch.editor.scanners;

import java.io.Reader;
import java.io.StringReader;

import javax.swing.text.Document;
import javax.swing.text.Position;
import javax.swing.text.BadLocationException;

import com.cburch.editor.Token;
import com.cburch.editor.Scanner;
import com.cburch.editor.util.DocumentReader;

%%

%public
%class JavaScanner
%implements Scanner<Token<JavaTokenType>>
%function nextToken
%type JavaToken

%unicode
%char

%{
	/**
	 * This addresses a current incompatibility between JFlex and
	 * Java 1.5, where the <code>nextToken</code> method cannot
	 * return a generic type.
	 */
	private static class JavaToken extends Token<JavaTokenType> {
		public JavaToken(JavaTokenType type, Position offset, String text, Object data) {
			super(type, offset, text, data);
		}
		
		public JavaToken(JavaTokenType type, Position offset, String text) {
			super(type, offset, text);
		}
	}

	/**
	 * Remembers the current document being edited.
	 */
	private Document document = null;
	
	/**
	 * Constructs a JavaScanner reading a null document. This
	 * null document will never be used; the tokenizer will always
	 * initialize the reader through a <code>reset</code> method
	 * before ever reading any tokens.
	 */
	public JavaScanner() {
		this(new StringReader(""));
	}

    public void reset(Reader reader, Token<JavaTokenType> token) {
    	if(reader instanceof DocumentReader) {
    		document = ((DocumentReader) reader).getDocument();
    	} else {
    		document = null;
    	}
        yyreset(reader);
		this.yychar = token.getEndOffset();
	}
	
	public void reset(Reader reader) {
    	if(reader instanceof DocumentReader) {
    		document = ((DocumentReader) reader).getDocument();
    	} else {
    		document = null;
    	}
		yyreset(reader);
	}

	/**
	 * Creates a token for the given type. This simplifies
	 * the code for handling individual tokens.
	 *
	 * @param type  the type to associate with the created token.
	 * @return the created token.
	 */
    private JavaToken token(JavaTokenType type) {
    	String text = yytext();
    	int endOffs = yychar + text.length();
    	
    	Position pos = null;
    	if(document != null) {
    		try { pos = document.createPosition(endOffs); } catch(BadLocationException e) { }
    	}
    	return new JavaToken(type, pos, text);
    }
    
	/**
	 * Creates a token for the given type. This simplifies
	 * the code for handling individual tokens.
	 *
	 * @param type  the type to associate with the created token.
	 * @param data  the data to associate with the created token.
	 * @return the created token.
	 */
    private JavaToken token(JavaTokenType type, Object data) {
    	String text = yytext();
    	int endOffs = yychar + text.length();
    	
    	Position pos = null;
    	if(document != null) {
    		try { pos = document.createPosition(endOffs); } catch(BadLocationException e) { }
    	}
    	return new JavaToken(type, pos, text, data);
    }
    
    /**
     * Creates an error token with the given error message associated
     * with it.
     *
     * @param message  the error message to associate with the token.
     * @return the created token
     */
    private JavaToken errorToken(String message) {
    	JavaToken ret = token(JavaTokenType.ERROR);
    	ret.setErrorMessage(message);
    	return ret;
    }

	/**
	 * Determine the actual string represented by the code. This
	 * is essentially a matter of replacing any escape sequences
	 * with their equivalent characters ("\t" gets replaced with
	 * a tab character, for example).</p>
	 *
	 * <p>This does not address Unicode escape sequences, which are
	 * technically handled in an earlier stage than tokenizing,
	 * so that for example one could use Unicode escape sequences
	 * in the middle of a Java program's variable names. This scanner
	 * does not address Unicode sequences at all.
	 *
	 * @param source  the original string, with escape characters.
	 * @return the string it represents, or <code>null</code> if it
	 *   has any errors.
	 */
    private String unescapeString(String source) {
    	StringBuilder ret = new StringBuilder(source.length());
    	for(int i = 0; i < source.length(); i++) {
    		char c = source.charAt(i);
    		if(c == '\\') {
    			i++;
    			if(i == source.length()) return null;
    			c = source.charAt(i);
				if(c == 'b')       ret.append('\b');
				else if(c == 't')  ret.append('\t');
				else if(c == 'n')  ret.append('\n');
				else if(c == 'f')  ret.append('\f');
				else if(c == 'r')  ret.append('\r');
				else if(c == '"')  ret.append('\"');
				else if(c == '\'') ret.append('\'');
				else if(c == '\\') ret.append('\'');
				else { // it must be an octal sequence
					int o0 = Character.digit(c, 8);
					if(o0 < 0) return null; // this is an invalid escape sequence
					
					int value;
					if(i + 1 < source.length() && Character.digit(source.charAt(i + 1), 8) >= 0) {
						i++; // this octal sequence has at least two digits
						int o1 = Character.digit(source.charAt(i), 8);
						if(o0 < 4 && i + 1 < source.length() && Character.digit(source.charAt(i + 1), 8) >= 0) {
							i++; // this octal sequenc has three digits
							int o2 = Character.digit(source.charAt(i), 8);
							value = o0 * 64 + o1 * 8 + o2;
						} else {
							value = o0 * 8 + o1;
						}
					} else {
						value = o0;
					}
					ret.append((char) value);
				}
    		} else {
    			ret.append(c);
    		}
    	}
    	return ret.toString();
    }
%}

HexDigit=([:digit:]|[a-fA-F])
Digit=([:digit:])
OctalDigit=([0-7])
TetraDigit=([0-3])
NonZeroDigit=([1-9])
BLANK=([ ])
TAB=([\t])
FF=([\f])
EscChar=([\\])
CR=([\r])
LF=([\n])
EOL=({CR}|{LF}|{CR}{LF})
WhiteSpace=({BLANK}|{TAB}|{FF}|{EOL})
AnyNonSeparator=([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\'])

OctEscape1=({EscChar}{OctalDigit})
OctEscape2=({EscChar}{OctalDigit}{OctalDigit})
OctEscape3=({EscChar}{TetraDigit}{OctalDigit}{OctalDigit})
OctEscape=({OctEscape1}|{OctEscape2}|{OctEscape3})

UnicodeEscape=({EscChar}[u]{HexDigit}{HexDigit}{HexDigit}{HexDigit})

Escape=({EscChar}([r]|[n]|[b]|[f]|[t]|[\\]|[\']|[\"]))
Identifier=([:jletter:][:jletterdigit:]*)
ErrorIdentifier=({AnyNonSeparator}+)

Comment=("//"[^\r\n]*)
TradCommentBegin=("/*")
DocCommentBegin =("/**")
NonTermStars=([^\*\/]*[\*]+[^\*\/])
TermStars=([\*]+[\/])
CommentText=((([^\*]*[\/])|{NonTermStars})*)
CommentEnd=([^\*]*{TermStars})
TradComment=({TradCommentBegin}{CommentText}{CommentEnd})
DocCommentEnd1=([^\/\*]{CommentText}{CommentEnd})
DocCommentEnd2=({NonTermStars}{CommentText}{CommentEnd})
DocComment=({DocCommentBegin}({DocCommentEnd1}|{DocCommentEnd2}|{TermStars}|[\/]))
OpenComment=({TradCommentBegin}{CommentText}([^\*]*)([\*]*))

Sign=([\+\-])
LongSuffix=([lL])
DecimalNum=({Sign}?(0|{NonZeroDigit}{Digit}*))
OctalNum=(0{OctalDigit}*)
HexNum=(0[xX]{HexDigit}{HexDigit}*)
DecimalLong=({Sign}?{DecimalNum}{LongSuffix})
OctalLong=({OctalNum}{LongSuffix})
HexLong=({HexNum}{LongSuffix})

SignedInt=({Sign}?{Digit}+)
Expo=([eE])
ExponentPart=({Expo}{SignedInt})
FloatSuffix=([fF])
DoubleSuffix=([dD])
FloatDouble1=({Digit}+\.{Digit}*{ExponentPart}?)
FloatDouble2=(\.{Digit}+{ExponentPart}?)
FloatDouble3=({Digit}+{ExponentPart})
FloatDouble4=({Digit}+)
Double1=({FloatDouble1}{DoubleSuffix}?)
Double2=({FloatDouble2}{DoubleSuffix}?)
Double3=({FloatDouble3}{DoubleSuffix}?)
Double4=({FloatDouble4}{DoubleSuffix})
Float1=({FloatDouble1}{FloatSuffix})
Float2=({FloatDouble2}{FloatSuffix})
Float3=({FloatDouble3}{FloatSuffix})
Float4=({FloatDouble4}{FloatSuffix})
Float=({Float1}|{Float2}|{Float3}|{Float4})
Double=({Double1}|{Double2}|{Double3}|{Double4}) 

ZeroFloatDouble1=(0+\.0*{ExponentPart}?)
ZeroFloatDouble2=(\.0+{ExponentPart}?)
ZeroFloatDouble3=(0+{ExponentPart})
ZeroFloatDouble4=(0+)
ZeroDouble1=({ZeroFloatDouble1}{DoubleSuffix}?)
ZeroDouble2=({ZeroFloatDouble2}{DoubleSuffix}?)
ZeroDouble3=({ZeroFloatDouble3}{DoubleSuffix}?)
ZeroDouble4=({ZeroFloatDouble4}{DoubleSuffix})
ZeroFloat1=({ZeroFloatDouble1}{FloatSuffix})
ZeroFloat2=({ZeroFloatDouble2}{FloatSuffix})
ZeroFloat3=({ZeroFloatDouble3}{FloatSuffix})
ZeroFloat4=({ZeroFloatDouble4}{FloatSuffix})
ZeroFloat=({ZeroFloat1}|{ZeroFloat2}|{ZeroFloat3}|{ZeroFloat4})
ZeroDouble=({ZeroDouble1}|{ZeroDouble2}|{ZeroDouble3}|{ZeroDouble4})

ErrorFloat=({Digit}({AnyNonSeparator}|\.)*)

AnyChrChr=([^\'\n\r\\])
UnclosedCharacter=(\'({Escape}|{OctEscape}|{UnicodeEscape}|{AnyChrChr}))
Character=({UnclosedCharacter}\')
MalformedUnclosedCharacter=(\'({AnyChrChr}|({EscChar}[^\n\r]))*)
MalformedCharacter=(\'\'|{MalformedUnclosedCharacter}\')

AnyStrChr=([^\"\n\r\\])
UnclosedString=(\"({Escape}|{OctEscape}|{UnicodeEscape}|{AnyStrChr})*)
String=({UnclosedString}\")
MalformedUnclosedString=(\"({EscChar}|{AnyStrChr})*)
MalformedString=({MalformedUnclosedString}\")

%%

<YYINITIAL> "(" { return token(JavaTokenType.LPAREN); }
<YYINITIAL> ")" { return token(JavaTokenType.RPAREN); }
<YYINITIAL> "{" { return token(JavaTokenType.LBRACE); }
<YYINITIAL> "}" { return token(JavaTokenType.RBRACE); }
<YYINITIAL> "[" { return token(JavaTokenType.LBRACKET); }
<YYINITIAL> "]" { return token(JavaTokenType.RBRACKET); }
<YYINITIAL> ";" { return token(JavaTokenType.SEMICOLON); }
<YYINITIAL> "," { return token(JavaTokenType.COMMA); }
<YYINITIAL> "." { return token(JavaTokenType.PERIOD); }

<YYINITIAL> "=" { return token(JavaTokenType.ASSIGN); }
<YYINITIAL> ">" { return token(JavaTokenType.RANGLE); }
<YYINITIAL> "<" { return token(JavaTokenType.LANGLE); }
<YYINITIAL> "!" { return token(JavaTokenType.EXCLAMATION); }
<YYINITIAL> "~" { return token(JavaTokenType.TILDE); }
<YYINITIAL> "?" { return token(JavaTokenType.QUESTION); }
<YYINITIAL> ":" { return token(JavaTokenType.COLON); }
<YYINITIAL> "+" { return token(JavaTokenType.PLUS); }
<YYINITIAL> "-" { return token(JavaTokenType.MINUS); }
<YYINITIAL> "*" { return token(JavaTokenType.STAR); }
<YYINITIAL> "/" { return token(JavaTokenType.SLASH); }
<YYINITIAL> "&" { return token(JavaTokenType.AMPERSAND); }
<YYINITIAL> "|" { return token(JavaTokenType.VERTICAL_BAR); }
<YYINITIAL> "^" { return token(JavaTokenType.CARET); }
<YYINITIAL> "%" { return token(JavaTokenType.PERCENT); }

<YYINITIAL> "==" { return token(JavaTokenType.IS_EQUAL); }
<YYINITIAL> "<=" { return token(JavaTokenType.IS_LESS_EQUAL); }
<YYINITIAL> ">=" { return token(JavaTokenType.IS_GREATER_EQUAL); }
<YYINITIAL> "!=" { return token(JavaTokenType.IS_NOT_EQUAL); }
<YYINITIAL> "||" { return token(JavaTokenType.LOGICAL_OR); }
<YYINITIAL> "&&" { return token(JavaTokenType.LOGICAL_AND); }
<YYINITIAL> "++" { return token(JavaTokenType.INCREMENT); }
<YYINITIAL> "--" { return token(JavaTokenType.DECREMENT); }
<YYINITIAL> ">>" { return token(JavaTokenType.SHIFT_RIGHT); }
<YYINITIAL> "<<" { return token(JavaTokenType.SHIFT_LEFT); }
<YYINITIAL> ">>>" { return token(JavaTokenType.SHIFT_RIGHT_LOGICAL); }
<YYINITIAL> "+=" { return token(JavaTokenType.ADD_ASSIGN); }
<YYINITIAL> "-=" { return token(JavaTokenType.SUBTRACT_ASSIGN); }
<YYINITIAL> "*=" { return token(JavaTokenType.MULTIPLY_ASSIGN); }
<YYINITIAL> "/=" { return token(JavaTokenType.DIVIDE_ASSIGN); }
<YYINITIAL> "&=" { return token(JavaTokenType.AND_ASSIGN); }
<YYINITIAL> "|=" { return token(JavaTokenType.OR_ASSIGN); }
<YYINITIAL> "^=" { return token(JavaTokenType.XOR_ASSIGN); }
<YYINITIAL> "%=" { return token(JavaTokenType.MOD_ASSIGN); }
<YYINITIAL> "<<=" { return token(JavaTokenType.SHIFT_LEFT_ASSIGN); }
<YYINITIAL> ">>=" { return token(JavaTokenType.SHIFT_RIGHT_ASSIGN); }
<YYINITIAL> ">>>=" { return token(JavaTokenType.SHIFT_RIGHT_LOGICAL_ASSIGN); }

<YYINITIAL> {Identifier} {
	String text = yytext();
	JavaTokenType type = JavaTokenType.forKeyword(text);
	if(type == JavaTokenType.BOOLEAN_LITERAL) {
		// create a token with the value in its data.
		return token(JavaTokenType.BOOLEAN_LITERAL,
			Boolean.valueOf(text.equals("true")));
	} else {
		return token(type);
	}
}

<YYINITIAL> {DecimalNum}|{OctalNum}|{HexNum} {
    /* At this point, the number we found could still be too large.
     * If it is too large, we need to return an error.
     * Java has methods built in that will decode from a string
     * and throw an exception the number is too large 
     */     
    try {
        return token(JavaTokenType.INTEGER_LITERAL, Integer.decode(yytext()));
    } catch (NumberFormatException e) {
    	return errorToken("Integer constant is out of bounds.");
    }
}
<YYINITIAL> {DecimalLong}|{OctalLong}|{HexLong} {
	try {
		return token(JavaTokenType.LONG_LITERAL, Long.decode(yytext()));
	} catch(NumberFormatException e) {
		return errorToken("Long constant is out of bounds.");
	}
}
<YYINITIAL> {ZeroFloat} {
    /* catch the case of a zero in parsing, so that we do not incorrectly
     * give an error that a number was rounded to zero
     */
	return token(JavaTokenType.FLOAT_LITERAL, Float.valueOf(0.0f));
}
<YYINITIAL> {ZeroDouble} {
	return token(JavaTokenType.DOUBLE_LITERAL, Double.valueOf(0.0));
}
<YYINITIAL> {Float} {
    /* 
     * Stephen Ostermiller reports that Sun's Java implementation
     * does not work quite correctly in terms of MAX_VALUE and MIN_VALUE,
     * and that the following code works around this. In any case,
     * the following definition is not going to hurt us any.
     */ 
    try {
        Float x = Float.valueOf(yytext());
        if (x.isInfinite() || x.compareTo(new Float(0f)) == 0){
        	return errorToken("Float value is out of bounds.");
        } else {
        	return token(JavaTokenType.FLOAT_LITERAL, x);
        }
    } catch (NumberFormatException e) {
    	return errorToken("Float value is out of bounds.");
    }
}
<YYINITIAL> {Double} {
	/* This is the same workaround we saw for floating-point numbers. */
    try {
        Double x = Double.valueOf(yytext());
        if (x.isInfinite() || x.compareTo(new Double(0d)) == 0){
            return errorToken("Double value is out of bounds.");
        } else {
        	return token(JavaTokenType.DOUBLE_LITERAL, x);
        }
    } catch (NumberFormatException e) {
    	return errorToken("Double value is out of bounds.");
    } 
}

<YYINITIAL> {Character} { 
	String text = yytext();
	if(text.length() < 3) return errorToken("Character constant is missing characters.");
	String data = unescapeString(text.substring(1, text.length() - 1));
	if(data == null) {
		return errorToken("Character constant is invalid.");
	} else if(data.length() > 1) {
		return errorToken("Character constant contains multiple characters.");
	} else if(data.length() == 0) {
		return errorToken("Character constant is missing a character.");
	} else {
		return token(JavaTokenType.CHARACTER_LITERAL, Character.valueOf(data.charAt(0)));
	}
}
<YYINITIAL> {String} { 
	String text = yytext();
	if(text.length() < 2) return errorToken("string constant missing quotations.");
	String data = unescapeString(text.substring(1, text.length() - 1));
	if(data == null) return errorToken("String constant is invalid.");
	return token(JavaTokenType.STRING_LITERAL, data);
}

<YYINITIAL> ({WhiteSpace}+) { }

<YYINITIAL> {Comment} { return token(JavaTokenType.COMMENT_END_OF_LINE); }
<YYINITIAL> {DocComment} { return token(JavaTokenType.JAVADOC); }
<YYINITIAL> {TradComment} { return token(JavaTokenType.COMMENT_TRADITIONAL); }

<YYINITIAL> {ErrorFloat} { return errorToken("Float constant is invalid."); }
<YYINITIAL> {OpenComment} { return errorToken("Comment must be closed."); }
<YYINITIAL> {MalformedUnclosedCharacter} { return errorToken("Character constant must be closed."); }
<YYINITIAL> {MalformedCharacter} { return errorToken("Character constant is malformed."); }
<YYINITIAL> {MalformedUnclosedString} { return errorToken("String constant must be closed."); }
<YYINITIAL> {MalformedString} { return errorToken("String constant is malformed."); }
<YYINITIAL> {ErrorIdentifier} { return errorToken("Identifier is invalid."); }
