/*
 * HtmlLexer is a generated file. You should edit HtmlLexer
 * and use JFlex to generate HtmlLexer.
 * <a href=http://jflex.de>JFlex</a> is available at
 * <a href=http://jflex.de><em>http://jflex.de/</em></a>.
 */
 
/*
 * This file is loosely based on Stephen Ostermiller's
 * <a href=http://ostermiller.org/syntax/>syntax highlighting package</a>
 * at <a href=http://ostermiller.org/syntax/><em>http://ostermiller.org/syntax/</em></a>.
 * My changes may well have introduced errors that Ostermiller did not
 * have. -Carl Burch 
 */
 
/**
 * Implements a scanner for retrieving HTML tokens from a
 * stream.
 *
 * @author Carl Burch
 * @version 0.1 2005-05-31
 */

package com.cburch.editor.scanners;

import java.io.Reader;
import java.io.StringReader;

import javax.swing.text.Document;
import javax.swing.text.Position;
import javax.swing.text.BadLocationException;

import com.cburch.editor.Token;
import com.cburch.editor.Scanner;
import com.cburch.editor.util.DocumentReader;

%%

%public
%class HtmlScanner
%implements Scanner<Token>
%function nextToken
%type HtmlToken

%unicode
%char

%{
	/**
	 * Represents a token from an HTML file. Because the scanner has state,
	 * we need each token to remember that state so that the scanner can
	 * be reset to that token appropriately.
	 */
	private static class HtmlToken extends Token {
		// Because the HtmlScanner uses state, we'll need to store
		// that information within each token.
		private int state;
		
		public HtmlToken(HtmlTokenType type, Position offset, String text,
				int state, Object data) {
			super(type, offset, text, data);
			this.state = state;
		}
		
		public HtmlToken(HtmlTokenType type, Position offset, String text,
				int state) {
			super(type, offset, text);
			this.state = state;
		}
		
		// We also need the state to be taken into account in
		// testing for equality, so that the tokenizer won't stop
		// on a similar token if it has a different state.
		public boolean equals(Object other) {
			if(!(other instanceof HtmlToken)) return false;
			return super.equals(other)
				&& this.state == ((HtmlToken) other).state;
		}
	}

	/**
	 * Remembers the current document being edited.
	 */
	private Document document = null;
	
	/**
	 * Constructs an HtmlScanner reading a null document. This
	 * null document will never be used; the tokenizer will always
	 * initialize the reader through a <code>reset</code> method
	 * before ever reading any tokens.
	 */
	public HtmlScanner() {
		this(new StringReader(""));
	}

    public void reset(Reader reader, Token token) {
    	if(reader instanceof DocumentReader) {
    		document = ((DocumentReader) reader).getDocument();
    	} else {
    		document = null;
    	}
    	HtmlToken ht = (HtmlToken) token;
        yyreset(reader);
		this.yychar = ht.getEndOffset();
		yybegin(ht.state);
	}
	
	public void reset(Reader reader) {
    	if(reader instanceof DocumentReader) {
    		document = ((DocumentReader) reader).getDocument();
    	} else {
    		document = null;
    	}
		yyreset(reader);
	}

	/**
	 * Creates a token for the given type. This simplifies
	 * the code for handling individual tokens.
	 *
	 * @param type  the type to associate with the created token.
	 * @return the created token.
	 */
    private HtmlToken token(HtmlTokenType type) {
    	String text = yytext();
    	int endOffs = yychar + text.length();
    	
    	Position pos = null;
    	if(document != null) {
    		try { pos = document.createPosition(endOffs); } catch(BadLocationException e) { }
    	}
    	return new HtmlToken(type, pos, text, yystate());
    }
    
	/**
	 * Creates a token for the given type. This simplifies
	 * the code for handling individual tokens.
	 *
	 * @param type  the type to associate with the created token.
	 * @param data  the data to associate with the created token.
	 * @return the created token.
	 */
    private HtmlToken token(HtmlTokenType type, Object data) {
    	String text = yytext();
    	int endOffs = yychar + text.length();
    	
    	Position pos = null;
    	if(document != null) {
    		try { pos = document.createPosition(endOffs); } catch(BadLocationException e) { }
    	}
    	return new HtmlToken(type, pos, text, yystate(), data);
    }
    
    /**
     * Creates an error token with the given error message associated
     * with it.
     *
     * @param message  the error message to associate with the token.
     * @return the created token
     */
    private HtmlToken errorToken(String message) {
    	HtmlToken ret = token(HtmlTokenType.ERROR);
    	ret.setErrorMessage(message);
    	return ret;
    }
%}

%state ANCHOR

Whitespace=([\ \t\n\r\f])

QuotedText=(\'[^\']*\'|\"[^\"]*\")
UnquotedText=([^\'\">]*)

Comment=("<!--"([^\-]|([\-][^\-]))*"--"{Whitespace}*>)
UnfinishedComment=("<!--"([^\-]|([\-][^\-]))*("--"{Whitespace}*)?)
Tag=("<"({QuotedText}|{UnquotedText})*">")
UnfinishedTag=("<"({QuotedText}|{UnquotedText})*)

HexDigit=([:digit:]|[a-fA-F])
CharacterRef=(\&([:letter:]+|\#[:digit:]+|\#[xX]{HexDigit}+)\;)
BadCharacterRef=(\&[^;]\;)
UnclosedCharacterRef=(\&[^;]*)

Text=(([^<\&]|{CharacterRef})*)

%%

<YYINITIAL> {Text} { return token(HtmlTokenType.TEXT); }
<ANCHOR> {Text} { return token(HtmlTokenType.ANCHOR_TEXT); }
<YYINITIAL, ANCHOR> {UnclosedCharacterRef} { return errorToken("Character reference must be closed with \";\"."); }
<YYINITIAL, ANCHOR> {BadCharacterRef} { return errorToken("Character reference is invalid."); }
<YYINITIAL, ANCHOR> {Comment} { return token(HtmlTokenType.COMMENT); }
<YYINITIAL, ANCHOR> {UnfinishedComment} { return errorToken("Comment must be closed with \"-->\"."); }
<YYINITIAL, ANCHOR> {Tag} {
	HtmlTag tag = HtmlTag.create(yytext());
	if(tag.getError() != null) return errorToken(tag.getError());
	if(!tag.isKnown()) {
		HtmlToken ret = token(HtmlTokenType.UNKNOWN_TAG);
		ret.setErrorMessage(tag.getName() + " tag is not defined in HTML 4.0.");
		return ret;
	}
	if(tag.isDeprecated()) {
		HtmlToken ret = token(HtmlTokenType.DEPRECATED_TAG);
		ret.setErrorMessage(tag.getName() + " tag is deprecated in HTML 4.0.");
		return ret;
	}
	if(tag.getName().equals("A")) {
		if(tag.isEndTag()) yybegin(YYINITIAL);
		else if(tag.containsAttribute("HREF")) yybegin(ANCHOR);
	}
	return token(HtmlTokenType.TAG, tag.getName());
}
<YYINITIAL, ANCHOR> {UnfinishedTag} { return errorToken("Tag must be closed with \">\"."); }
