/*
 * Copyright (c) 2005, Carl Burch.
 * 
 * This file is part of the com.cburch.editor package. The latest
 * version is available at http://www.cburch.com/proj/editor/.
 *
 * The com.cburch.editor package is free software; you can redistribute
 * it and/or modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * The com.cburch.editor package is distributed in the hope that it will
 * be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with the com.cburch.editor package; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301  USA
 */
 
 package com.cburch.editor.tokens;

/*
 * This class is very loosely based on Stephen Ostermiller's
 * <a href=http://ostermiller.org/syntax/>syntax highlighting package</a>
 * at <a href=http://ostermiller.org/syntax/><em>http://ostermiller.org/syntax/</em></a>.
 * My changes, though, have been quite substantive, and I may well
 * have introduced errors that Ostermiller did not have. -Carl Burch 
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;

import javax.swing.event.DocumentEvent;
import javax.swing.event.DocumentListener;
import javax.swing.text.*;

import com.cburch.editor.Scanner;
import com.cburch.editor.util.DocumentReader;
import com.cburch.editor.util.WeakEventSupport;

/**
 * The class that implements the basic functionality behind tracking
 * the tokens within a document and notifying other classes about
 * the changes.
 * 
 * @author Carl Burch
 * @version 0.1 2005-05-31
 */
public class Tokenizer<T extends BasicToken> {
    /**
     * With every change to the document, we back up this many
     * characters from the beginning of the change to start
     * looking at tokens again. This allows us to write a scanner
     * that doesn't have to worry quite so much about partial matches.</p>
     * 
     * <p>For example, suppose we have a single-slash token ('/') and a
     * double-slash token ('//'). (This would happen in Java, for
     * example.) Now suppose that we already have a slash in our
     * input. If the user inserts a slash after it, and if there is no
     * backup, then this tokenizer would start at this inserted slash
     * and erroneously identify it as a single-slash token.  
     * ('//'). With a backup of 1, it starts one character before
     * the inserted slash and correctly identifies the double-slash
     * token.</p>
     * 
     * <p>You don't need any backup if you write your scanner carefully,
     * however. But that's quite a pain. It's much easier (and not
     * too inefficient) to have a generous BACKUP instead. 
     */
    private static final int BACKUP = 4;
    
    /**
     * Controls whether to display changes to the token list to
     * <code>System.err<code>.
     */
    private static final boolean DEBUG = false;
    
    /**
     * This thread handles asynchronous notification of changes
     * to the token list. There's no point in having multiple such
     * threads, so it's a class variable.
     */
    private static Executor executor = null;

    /**
     * Listens for changes to the document text and translates
     * them to requests to rescan document segments.
     */
    private class MyDocumentListener implements DocumentListener {
        /** Text has been inserted. */
        public void insertUpdate(DocumentEvent event) {
            Document doc = event.getDocument();
            int offs = event.getOffset();
            int len = event.getLength();
            if(DEBUG) System.err.println("Tokenizer: insertUpdate " + offs + " " + len);
            scan(offs - BACKUP, offs + len);
        }

        /** Text has been removed. */
        public void removeUpdate(DocumentEvent event) {
            Document doc = event.getDocument();
            int offs = event.getOffset();
            int len = event.getLength();
            if(DEBUG) System.err.println("Tokenizer: removeUpdate " + offs + " " + len);
            scan(offs - BACKUP, offs + len + 1);
        }

        /**
         * This method is apparently never called when the text
         * changes - it is called to tell about changes in
         * attributes. Since the highlighting mechanism changes
         * attributes in response to any changes in the token
         * list, I do not want to change the token list here:
         * It would lead to lots of cross-chatter between the
         * highlighter and this Tokenizer.
         */
        public void changedUpdate(DocumentEvent e) { }
    }

    /**
     * Handles the asynchronous firing of a change to the token list.
     */
    private class AsynchronousFire implements Runnable {
        /** The event to be fired. */
        TokenizerEvent<T> event;

        /** Constructs the job for a given event. */
        AsynchronousFire(TokenizerEvent<T> event) {
            this.event = event;
        }
        
        /** Fires the event out to all asynchronous listeners. */
        public void run() {
            for(TokenizerListener<T> l : asynchronousListeners) {
                l.rangeReplaced(event);
            }
        }
    }

    /**
     * The scanner that we'll use to retrieve tokens.
     */
    private Scanner<? extends T> scanner;
    
    /**
     * The document that we are scanning.
     */
    private Document document;
    
    /**
     * A reader corresponding to the current document, suitable
     * for use by the scanner.
     */
    private DocumentReader documentReader;

    /**
     * The list of tokens that are currently in the document.
     */
    private TokenList<T> tokenList = new TokenList<T>();
    
    /**
     * The listener that modifiers the token list whenever the
     * document is changed.
     */
    private MyDocumentListener documentListener = new MyDocumentListener();
    
    /**
     * Tracks whether this tokenizer is currently enabled.
     */
    private boolean enabled = true;

    /**
     * The list of synchronous listeners to changes in the token list.
     */
    private WeakEventSupport<TokenizerListener> synchronousListeners
        = new WeakEventSupport<TokenizerListener>();
    
    /**
     * The list of asynchronous listeners to changes in the token list.
     */
    private WeakEventSupport<TokenizerListener> asynchronousListeners
        = new WeakEventSupport<TokenizerListener>();

    /**
     * Constructs a <code>Tokenizer</code> for a null scanner and
     * document. This leads to an empty list of tokens.
     */
    public Tokenizer() {
        this(null, null);
    }

    /**
     * Constructs a <code>Tokenizer</code> for the given document
     * and scanner.
     * 
     * @param document  the initial document to be scanned.
     * @param scanner   the initial scanner to use on the document.
     */
    public Tokenizer(Document document, Scanner<? extends T> scanner) {
        this.scanner = scanner;
        this.document = document;
        documentReader = new DocumentReader(document);
        if(document != null) document.addDocumentListener(documentListener);
    }
    
    /**
     * Adds a listener to be notified whenever the token list has
     * been changed. The listener can be either <em>synchronous</em>
     * (notified immediately after the change takes place) or
     * <em>asynchronous</em> (notified shortly after the change, by
     * another thread).</p>
     * 
     * <p>A listener must be asynchronous if it will alter the document
     * in any way: The Java API prevents these alterations from taking
     * place when it is in the process of sending out modifications
     * to the document. A syntax highlighter, in particular, must be
     * asynchronous, because the changes to character styles in the
     * document constitute changes to the document.</p>
     *  
     * <p>An example where a synchronous listener is useful is in
     * parenthesis matching. Here, when the user inserts a parenthesis,
     * we would want a listener to compute the parenthesis matches
     * immediately, so that the subsequent change to caret position
     * can be based on the new matching information.
     * 
     * Generally, a listener should be synchronous unless it will
     * change the document. 
     * 
     * @param listener  the listener to be added.
     * @param synchronous  <code>true</code> if the listener should
     *    be notified immediately after the change.
     */
    public void addTokenizerListener(TokenizerListener listener,
            boolean synchronous) {
        if(synchronous) synchronousListeners.add(listener);
        else asynchronousListeners.add(listener);
    }
    
    /**
     * Removes the designated listener from being notified, whether
     * it is synchronous or asynchronous.
     * 
     * @param listener  the listener to be removed.
     */
    public void removeTokenizerListener(TokenizerListener listener) {
        synchronousListeners.remove(listener);
        asynchronousListeners.remove(listener);
    }
    
    /**
     * Returns the list of tokens that always holds the current
     * list as maintained by this document.
     *  
     * @return the list of tokens in the document.
     */
    public TokenList<T> getTokenList() {
        return tokenList;
    }
    
    /**
     * Returns the scanner used for finding tokens.
     * 
     * @return the scanner currently in use.
     */
    public Scanner<? extends T> getScanner() {
        return scanner;
    }
    
    /**
     * Changes the scanner used for finding tokens. This will
     * necessitate scanning the entire document. A <code>null</code>
     * value will result in an empty token list.
     * 
     * @param value  the new scanner to use.
     */
    public void setScanner(Scanner<? extends T> value) {
        scanner = value;
        scanAll();
    }
    
    /**
     * Returns the document currently tracked by this tokenizer.
     * 
     * @return the document tracked.
     */
    public Document getDocument() {
        return document;
    }
    
    /**
     * Changes which document whose tokens should be tracked.
     * 
     * @param value  the document whose tokens should be tracked,
     *   or <code>null</code> if we should use the empty document.
     */
    public void setDocument(Document value) {
        if(enabled && document != null) document.removeDocumentListener(documentListener);
        document = value;
        if(enabled && document != null) document.addDocumentListener(documentListener);
        documentReader = new DocumentReader(document);
        scanAll();
    }
    
    /**
     * Indicates whether the tokenizer is currently enabled.
     *  
     * @return <code>true</code> if the tokenizer is enabled.
     */
    public boolean isEnabled() {
        return enabled;
    }
    
    /**
     * Enables or disables the tokenizer. Disabling the tokenizer
     * will freeze the token list in its current state, until the
     * tokenizer is enabled again. Enabling the tokenizer will
     * force a rescan of the entire document.
     * 
     * @param value  <code>true</code> to enable, <code>false</code>
     *     to disable.
     */
    public void setEnabled(boolean value) {
        if(value == enabled) return;
        enabled = value;
        if(enabled) {
            document.addDocumentListener(documentListener);
            scanAll();
        } else {
            document.removeDocumentListener(documentListener);
        }
    }

    /**
     * Clears the token list, most likely because either the
     * current document or the current scanner is <code>null</code>. 
     */
    private void clearTokenList() {
        if(!tokenList.isEmpty()) {
            ArrayList<T> oldTokens = new ArrayList<T>(tokenList);
            tokenList.doClear();
            fireEvent(0, oldTokens, new ArrayList<T>());
        }
    }
    
    /**
     * Scans the entire document, replacing the entire token list.
     */
    private void scanAll() {
        Document document = this.document;
        if(document == null) {
            clearTokenList();
        } else {
            scan(0, document.getLength());
        }
    }

    /**
     * Scans a segment of the document, starting at some point before
     * the given segment and going past the end of the given
     * segment. 
     * 
     * @param start  the offset of the segment's beginning location.
     * @param stop   the offset of the segment's ending location, exclusive.
     */
    private void scan(int start, int stop) {
        // slurping these up into local variables in case another
        // thread changes them
        Scanner<T> scanner = (Scanner<T>) this.scanner;
        Document document = this.document;
        if(scanner == null || document == null) {
            clearTokenList();
            return;
        }
        
        // Find the index of the first token I want to replace.
        int oldStart = tokenList.getIndexEndingBefore(start);
        if(oldStart >= tokenList.size()) oldStart = tokenList.size() - 1;
        if(oldStart < 0) oldStart = 0;
        
        // oldEnd will be the index of the last token removed;
        // endToken will be the token at that index.
        int oldEnd = oldStart;
        T endToken = (oldEnd < tokenList.size() ? tokenList.get(oldEnd) : null);
        
        ArrayList<T> added = new ArrayList<T>();

        // Reset the scanner so that it thinks it is
        // starting at the beginning of the document but
        // reporting a funny start position. The reset
        // causes the Reader's close method to be called
        // but this causes no problems with a DocumentReader.
        if(oldStart > 0) {
            T lastOkToken = tokenList.get(oldStart - 1);
            int offsetBegin = lastOkToken.getEndOffset();
            try {
                scanner.reset(documentReader, lastOkToken);
            } catch(IOException e) { throw new RuntimeException(e); }
            documentReader.seek(offsetBegin);
        } else {
            try {
                scanner.reset(documentReader);
            } catch(IOException e) { throw new RuntimeException(e); }
            documentReader.seek(0);
        }
        
        while(true) {
            // Retrieve the next token. If it is null, we've reached
            // the document's end, and we can stop.
            T t;
            try {
                t = scanner.nextToken();
            } catch(IOException e) { throw new RuntimeException(e); }
            if(t == null) {
                oldEnd = tokenList.size();
                endToken = null;
                break;
            }

            // Skip past any old tokens that precede this token,
            // removing them.
            int tOffset = t.getEndOffset();
            while(endToken != null && endToken.getEndOffset() < tOffset) {
                ++oldEnd;
                endToken = (oldEnd < tokenList.size() ? tokenList.get(oldEnd) : null);
            }
            
            // If we've passed the end of the segment to recolor,
            // and we've reached a token matching what we had before,
            // then we can stop the update.
            if(tOffset > stop && t.equals(endToken)) {
                break;
            }

            added.add(t);
        }
        
        // It may well be that there are some duplicated tokens at
        // the beginning or the end; we'll trim off the duplicates.
        int newStart = 0;
        int newEnd = added.size();
        while(oldStart < oldEnd && newStart < newEnd
                && tokenList.get(oldStart).equals(added.get(newStart))) {
            ++oldStart;
            ++newStart;
        }
        while(oldStart < oldEnd && newStart < newEnd
                && tokenList.get(oldEnd - 1).equals(added.get(newEnd - 1))) {
            --oldEnd;
            --newEnd;
        }
        
        if(DEBUG) {
            System.err.println("Tokenizer: scan " + start + " " + stop); 
            System.err.println("  replacing "
                    + (oldEnd - oldStart) + " tokens at " + oldStart
                    + " with " + (newEnd - newStart) + " tokens");
            for(int i = oldStart; i < oldEnd; i++) {
                System.err.println("  removing " + tokenList.get(i));
            }
            for(int i = newStart; i < newEnd; i++) {
                System.err.println("  adding   " + added.get(i));
            }
            System.err.println("  stopped at " + (endToken == null ? "EOF" : endToken));
        }
        
        if(oldStart >= oldEnd && newStart >= newEnd) return;
        List<T> oldTokens = Collections.unmodifiableList(
                new ArrayList<T>(tokenList.subList(oldStart, oldEnd)));
        List<T> newTokens = Collections.unmodifiableList(
                added.subList(newStart, newEnd));
        tokenList.replace(oldStart, oldEnd, newTokens);
        fireEvent(oldStart, oldTokens, newTokens);
    }
    
    /**
     * Fire an event to all listeners notifying them of a change in
     * the list of tokens, handling synchronous and asynchronous
     * listeners appropriately.
     * 
     * @param from  the starting index of the range. 
     * @param oldTokens  the tokens in the segment previous to the change.
     * @param newTokens  the tokens in the segment following the change.
     */
    private void fireEvent(int from, List<T> oldTokens, List<T> newTokens) {
        TokenizerEvent<T> e = new TokenizerEvent<T>(this,
                from, oldTokens, newTokens);
        for(TokenizerListener<T> l : synchronousListeners) {
            l.rangeReplaced(e);
        }
        
        if(!asynchronousListeners.isEmpty()) {
            // queue the job up for executor to fire the event
            // to asynchronous listeners
            if(executor == null) executor = Executors.newSingleThreadExecutor();
            executor.execute(new AsynchronousFire(e));
        }
    }
}
