|
What this is
Other links
The source code/* * TokenMarker.java - Tokenizes lines of text * :tabSize=8:indentSize=8:noTabs=false: * :folding=explicit:collapseFolds=1: * * Copyright (C) 1998, 2003 Slava Pestov * Copyright (C) 1999, 2000 mike dillon * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package org.gjt.sp.jedit.syntax; //{{{ Imports import gnu.regexp.*; import javax.swing.text.Segment; import java.util.*; import org.gjt.sp.jedit.*; import org.gjt.sp.util.CharIndexedSegment; import org.gjt.sp.util.Log; //}}} /** * A token marker splits lines of text into tokens. Each token carries * a length field and an identification tag that can be mapped to a color * or font style for painting that token. * * @author Slava Pestov, mike dillon * @version $Id: TokenMarker.java,v 1.62 2003/12/27 05:14:46 spestov Exp $ * * @see org.gjt.sp.jedit.syntax.Token * @see org.gjt.sp.jedit.syntax.TokenHandler */ public class TokenMarker { //{{{ TokenMarker constructor public TokenMarker() { ruleSets = new Hashtable(64); } //}}} //{{{ addRuleSet() method public void addRuleSet(ParserRuleSet rules) { ruleSets.put(rules.getSetName(), rules); if (rules.getSetName().equals("MAIN")) mainRuleSet = rules; } //}}} //{{{ getMainRuleSet() method public ParserRuleSet getMainRuleSet() { return mainRuleSet; } //}}} //{{{ getRuleSet() method public ParserRuleSet getRuleSet(String setName) { return (ParserRuleSet) ruleSets.get(setName); } //}}} //{{{ getRuleSets() method /** * @since jEdit 4.2pre3 */ public ParserRuleSet[] getRuleSets() { return (ParserRuleSet[])ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]); } //}}} //{{{ markTokens() method /** * Do not call this method directly; call Buffer.markTokens() instead. */ public LineContext markTokens(LineContext prevContext, TokenHandler tokenHandler, Segment line) { //{{{ Set up some instance variables // this is to avoid having to pass around lots and lots of // parameters. this.tokenHandler = tokenHandler; this.line = line; lastOffset = line.offset; lineLength = line.count + line.offset; context = new LineContext(); if(prevContext == null) context.rules = getMainRuleSet(); else { context.parent = prevContext.parent; context.inRule = prevContext.inRule; context.rules = prevContext.rules; context.spanEndSubst = prevContext.spanEndSubst; } keywords = context.rules.getKeywords(); escaped = false; seenWhitespaceEnd = false; whitespaceEnd = line.offset; //}}} //{{{ Main parser loop ParserRule rule; int terminateChar = context.rules.getTerminateChar(); boolean terminated = false; main_loop: for(pos = line.offset; pos < lineLength; pos++) { //{{{ check if we have to stop parsing if(terminateChar >= 0 && pos - line.offset >= terminateChar && !terminated) { terminated = true; context = new LineContext(ParserRuleSet .getStandardRuleSet(context.rules .getDefault()),context); keywords = context.rules.getKeywords(); } //}}} //{{{ check for end of delegate if(context.parent != null) { rule = context.parent.inRule; if(rule != null) { if(checkDelegateEnd(rule)) { seenWhitespaceEnd = true; continue main_loop; } } } //}}} //{{{ check every rule char ch = line.array[pos]; rule = context.rules.getRules(ch); while(rule != null) { // stop checking rules if there was a match if (handleRule(rule,false)) { seenWhitespaceEnd = true; continue main_loop; } rule = rule.next; } //}}} //{{{ check if current character is a word separator if(Character.isWhitespace(ch)) { if(!seenWhitespaceEnd) whitespaceEnd = pos + 1; if(context.inRule != null) handleRule(context.inRule,true); handleNoWordBreak(); markKeyword(false); if(lastOffset != pos) { tokenHandler.handleToken(line, context.rules.getDefault(), lastOffset - line.offset, pos - lastOffset, context); } tokenHandler.handleToken(line, context.rules.getDefault(), pos - line.offset,1,context); lastOffset = pos + 1; escaped = false; } else { if(keywords != null || context.rules.getRuleCount() != 0) { String noWordSep = context.rules.getNoWordSep(); if(!Character.isLetterOrDigit(ch) && noWordSep.indexOf(ch) == -1) { if(context.inRule != null) handleRule(context.inRule,true); handleNoWordBreak(); markKeyword(true); tokenHandler.handleToken(line, context.rules.getDefault(), lastOffset - line.offset,1, context); lastOffset = pos + 1; } } seenWhitespaceEnd = true; escaped = false; } //}}} } //}}} //{{{ Mark all remaining characters pos = lineLength; if(context.inRule != null) handleRule(context.inRule,true); handleNoWordBreak(); markKeyword(true); //}}} //{{{ Unwind any NO_LINE_BREAK parent delegates unwind: while(context.parent != null) { rule = context.parent.inRule; if((rule != null && (rule.action & ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK) || terminated) { context = context.parent; keywords = context.rules.getKeywords(); context.inRule = null; } else break unwind; } //}}} tokenHandler.handleToken(line,Token.END, pos - line.offset,0,context); context = context.intern(); tokenHandler.setLineContext(context); return context; } //}}} //{{{ Private members //{{{ Instance variables private Hashtable ruleSets; private ParserRuleSet mainRuleSet; // Instead of passing these around to each method, we just store them // as instance variables. Note that this is not thread-safe. private TokenHandler tokenHandler; private Segment line; private LineContext context; private KeywordMap keywords; private Segment pattern = new Segment(); private int lastOffset; private int lineLength; private int pos; private boolean escaped; private int whitespaceEnd; private boolean seenWhitespaceEnd; //}}} //{{{ checkDelegateEnd() method private boolean checkDelegateEnd(ParserRule rule) { if(rule.end == null) return false; LineContext tempContext = context; context = context.parent; keywords = context.rules.getKeywords(); boolean tempEscaped = escaped; boolean b = handleRule(rule,true); context = tempContext; keywords = context.rules.getKeywords(); if(b && !tempEscaped) { if(context.inRule != null) handleRule(context.inRule,true); markKeyword(true); context = (LineContext)context.parent.clone(); tokenHandler.handleToken(line, (context.inRule.action & ParserRule.EXCLUDE_MATCH) == ParserRule.EXCLUDE_MATCH ? context.rules.getDefault() : context.inRule.token, pos - line.offset,pattern.count,context); keywords = context.rules.getKeywords(); context.inRule = null; lastOffset = pos + pattern.count; // move pos to last character of match sequence pos += (pattern.count - 1); return true; } // check escape rule of parent if((rule.action & ParserRule.NO_ESCAPE) == 0) { ParserRule escape = context.parent.rules.getEscapeRule(); if(escape != null && handleRule(escape,false)) return true; } return false; } //}}} //{{{ handleRule() method /** * Checks if the rule matches the line at the current position * and handles the rule if it does match */ private boolean handleRule(ParserRule checkRule, boolean end) { //{{{ Some rules can only match in certain locations if(!end) { if(Character.toUpperCase(checkRule.hashChar) != Character.toUpperCase(line.array[pos])) { return false; } } int offset = ((checkRule.action & ParserRule.MARK_PREVIOUS) != 0) ? lastOffset : pos; int posMatch = (end ? checkRule.endPosMatch : checkRule.startPosMatch); if((posMatch & ParserRule.AT_LINE_START) == ParserRule.AT_LINE_START) { if(offset != line.offset) return false; } else if((posMatch & ParserRule.AT_WHITESPACE_END) == ParserRule.AT_WHITESPACE_END) { if(offset != whitespaceEnd) return false; } else if((posMatch & ParserRule.AT_WORD_START) == ParserRule.AT_WORD_START) { if(offset != lastOffset) return false; } //}}} int matchedChars = 1; CharIndexedSegment charIndexed = null; REMatch match = null; //{{{ See if the rule's start or end sequence matches here if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0) { // the end cannot be a regular expression if((checkRule.action & ParserRule.REGEXP) == 0 || end) { if(end) { if(context.spanEndSubst != null) pattern.array = context.spanEndSubst; else pattern.array = checkRule.end; } else pattern.array = checkRule.start; pattern.offset = 0; pattern.count = pattern.array.length; matchedChars = pattern.count; if(!SyntaxUtilities.regionMatches(context.rules .getIgnoreCase(),line,pos,pattern.array)) { return false; } } else { // note that all regexps start with \A so they only // match the start of the string int matchStart = pos - line.offset; charIndexed = new CharIndexedSegment(line,matchStart); match = checkRule.startRegexp.getMatch( charIndexed,0,RE.REG_ANCHORINDEX); if(match == null) return false; else if(match.getStartIndex() != 0) throw new InternalError("Can't happen"); else { matchedChars = match.getEndIndex(); /* workaround for hang if match was * zero-width. not sure if there is * a better way to handle this */ if(matchedChars == 0) matchedChars = 1; } } } //}}} //{{{ Check for an escape sequence if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE) { if(context.inRule != null) handleRule(context.inRule,true); escaped = !escaped; pos += pattern.count - 1; } else if(escaped) { escaped = false; pos += pattern.count - 1; } //}}} //{{{ Handle start of rule else if(!end) { if(context.inRule != null) handleRule(context.inRule,true); markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS) != ParserRule.MARK_PREVIOUS); switch(checkRule.action & ParserRule.MAJOR_ACTIONS) { //{{{ SEQ case ParserRule.SEQ: context.spanEndSubst = null; if((checkRule.action & ParserRule.REGEXP) != 0) { handleTokenWithSpaces(tokenHandler, checkRule.token, pos - line.offset, matchedChars, context); } else { tokenHandler.handleToken(line, checkRule.token, pos - line.offset, matchedChars,context); } // a DELEGATE attribute on a SEQ changes the // ruleset from the end of the SEQ onwards if(checkRule.delegate != null) { context = new LineContext( checkRule.delegate, context.parent); keywords = context.rules.getKeywords(); } break; //}}} //{{{ SPAN, EOL_SPAN case ParserRule.SPAN: case ParserRule.EOL_SPAN: context.inRule = checkRule; byte tokenType = ((checkRule.action & ParserRule.EXCLUDE_MATCH) == ParserRule.EXCLUDE_MATCH ? context.rules.getDefault() : checkRule.token); if((checkRule.action & ParserRule.REGEXP) != 0) { handleTokenWithSpaces(tokenHandler, tokenType, pos - line.offset, matchedChars, context); } else { tokenHandler.handleToken(line,tokenType, pos - line.offset, matchedChars,context); } char[] spanEndSubst = null; /* substitute result of matching the rule start * into the end string. * * eg, in shell script mode, <<\s*(\w+) is * matched into \<$1\> to construct rules for * highlighting read-ins like this < |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.