|
What this is
Other links
The source code
/*
* TokenMarker.java - Tokenizes lines of text
* :tabSize=8:indentSize=8:noTabs=false:
* :folding=explicit:collapseFolds=1:
*
* Copyright (C) 1998, 2003 Slava Pestov
* Copyright (C) 1999, 2000 mike dillon
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package org.gjt.sp.jedit.syntax;
//{{{ Imports
import gnu.regexp.*;
import javax.swing.text.Segment;
import java.util.*;
import org.gjt.sp.jedit.*;
import org.gjt.sp.util.CharIndexedSegment;
import org.gjt.sp.util.Log;
//}}}
/**
* A token marker splits lines of text into tokens. Each token carries
* a length field and an identification tag that can be mapped to a color
* or font style for painting that token.
*
* @author Slava Pestov, mike dillon
* @version $Id: TokenMarker.java,v 1.62 2003/12/27 05:14:46 spestov Exp $
*
* @see org.gjt.sp.jedit.syntax.Token
* @see org.gjt.sp.jedit.syntax.TokenHandler
*/
public class TokenMarker
{
//{{{ TokenMarker constructor
public TokenMarker()
{
ruleSets = new Hashtable(64);
} //}}}
//{{{ addRuleSet() method
public void addRuleSet(ParserRuleSet rules)
{
ruleSets.put(rules.getSetName(), rules);
if (rules.getSetName().equals("MAIN"))
mainRuleSet = rules;
} //}}}
//{{{ getMainRuleSet() method
public ParserRuleSet getMainRuleSet()
{
return mainRuleSet;
} //}}}
//{{{ getRuleSet() method
public ParserRuleSet getRuleSet(String setName)
{
return (ParserRuleSet) ruleSets.get(setName);
} //}}}
//{{{ getRuleSets() method
/**
* @since jEdit 4.2pre3
*/
public ParserRuleSet[] getRuleSets()
{
return (ParserRuleSet[])ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]);
} //}}}
//{{{ markTokens() method
/**
* Do not call this method directly; call Buffer.markTokens() instead.
*/
public LineContext markTokens(LineContext prevContext,
TokenHandler tokenHandler, Segment line)
{
//{{{ Set up some instance variables
// this is to avoid having to pass around lots and lots of
// parameters.
this.tokenHandler = tokenHandler;
this.line = line;
lastOffset = line.offset;
lineLength = line.count + line.offset;
context = new LineContext();
if(prevContext == null)
context.rules = getMainRuleSet();
else
{
context.parent = prevContext.parent;
context.inRule = prevContext.inRule;
context.rules = prevContext.rules;
context.spanEndSubst = prevContext.spanEndSubst;
}
keywords = context.rules.getKeywords();
escaped = false;
seenWhitespaceEnd = false;
whitespaceEnd = line.offset;
//}}}
//{{{ Main parser loop
ParserRule rule;
int terminateChar = context.rules.getTerminateChar();
boolean terminated = false;
main_loop: for(pos = line.offset; pos < lineLength; pos++)
{
//{{{ check if we have to stop parsing
if(terminateChar >= 0 && pos - line.offset >= terminateChar
&& !terminated)
{
terminated = true;
context = new LineContext(ParserRuleSet
.getStandardRuleSet(context.rules
.getDefault()),context);
keywords = context.rules.getKeywords();
} //}}}
//{{{ check for end of delegate
if(context.parent != null)
{
rule = context.parent.inRule;
if(rule != null)
{
if(checkDelegateEnd(rule))
{
seenWhitespaceEnd = true;
continue main_loop;
}
}
} //}}}
//{{{ check every rule
char ch = line.array[pos];
rule = context.rules.getRules(ch);
while(rule != null)
{
// stop checking rules if there was a match
if (handleRule(rule,false))
{
seenWhitespaceEnd = true;
continue main_loop;
}
rule = rule.next;
} //}}}
//{{{ check if current character is a word separator
if(Character.isWhitespace(ch))
{
if(!seenWhitespaceEnd)
whitespaceEnd = pos + 1;
if(context.inRule != null)
handleRule(context.inRule,true);
handleNoWordBreak();
markKeyword(false);
if(lastOffset != pos)
{
tokenHandler.handleToken(line,
context.rules.getDefault(),
lastOffset - line.offset,
pos - lastOffset,
context);
}
tokenHandler.handleToken(line,
context.rules.getDefault(),
pos - line.offset,1,context);
lastOffset = pos + 1;
escaped = false;
}
else
{
if(keywords != null || context.rules.getRuleCount() != 0)
{
String noWordSep = context.rules.getNoWordSep();
if(!Character.isLetterOrDigit(ch)
&& noWordSep.indexOf(ch) == -1)
{
if(context.inRule != null)
handleRule(context.inRule,true);
handleNoWordBreak();
markKeyword(true);
tokenHandler.handleToken(line,
context.rules.getDefault(),
lastOffset - line.offset,1,
context);
lastOffset = pos + 1;
}
}
seenWhitespaceEnd = true;
escaped = false;
} //}}}
} //}}}
//{{{ Mark all remaining characters
pos = lineLength;
if(context.inRule != null)
handleRule(context.inRule,true);
handleNoWordBreak();
markKeyword(true);
//}}}
//{{{ Unwind any NO_LINE_BREAK parent delegates
unwind: while(context.parent != null)
{
rule = context.parent.inRule;
if((rule != null && (rule.action
& ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK)
|| terminated)
{
context = context.parent;
keywords = context.rules.getKeywords();
context.inRule = null;
}
else
break unwind;
} //}}}
tokenHandler.handleToken(line,Token.END,
pos - line.offset,0,context);
context = context.intern();
tokenHandler.setLineContext(context);
return context;
} //}}}
//{{{ Private members
//{{{ Instance variables
private Hashtable ruleSets;
private ParserRuleSet mainRuleSet;
// Instead of passing these around to each method, we just store them
// as instance variables. Note that this is not thread-safe.
private TokenHandler tokenHandler;
private Segment line;
private LineContext context;
private KeywordMap keywords;
private Segment pattern = new Segment();
private int lastOffset;
private int lineLength;
private int pos;
private boolean escaped;
private int whitespaceEnd;
private boolean seenWhitespaceEnd;
//}}}
//{{{ checkDelegateEnd() method
private boolean checkDelegateEnd(ParserRule rule)
{
if(rule.end == null)
return false;
LineContext tempContext = context;
context = context.parent;
keywords = context.rules.getKeywords();
boolean tempEscaped = escaped;
boolean b = handleRule(rule,true);
context = tempContext;
keywords = context.rules.getKeywords();
if(b && !tempEscaped)
{
if(context.inRule != null)
handleRule(context.inRule,true);
markKeyword(true);
context = (LineContext)context.parent.clone();
tokenHandler.handleToken(line,
(context.inRule.action & ParserRule.EXCLUDE_MATCH)
== ParserRule.EXCLUDE_MATCH
? context.rules.getDefault()
: context.inRule.token,
pos - line.offset,pattern.count,context);
keywords = context.rules.getKeywords();
context.inRule = null;
lastOffset = pos + pattern.count;
// move pos to last character of match sequence
pos += (pattern.count - 1);
return true;
}
// check escape rule of parent
if((rule.action & ParserRule.NO_ESCAPE) == 0)
{
ParserRule escape = context.parent.rules.getEscapeRule();
if(escape != null && handleRule(escape,false))
return true;
}
return false;
} //}}}
//{{{ handleRule() method
/**
* Checks if the rule matches the line at the current position
* and handles the rule if it does match
*/
private boolean handleRule(ParserRule checkRule, boolean end)
{
//{{{ Some rules can only match in certain locations
if(!end)
{
if(Character.toUpperCase(checkRule.hashChar)
!= Character.toUpperCase(line.array[pos]))
{
return false;
}
}
int offset = ((checkRule.action & ParserRule.MARK_PREVIOUS) != 0) ?
lastOffset : pos;
int posMatch = (end ? checkRule.endPosMatch : checkRule.startPosMatch);
if((posMatch & ParserRule.AT_LINE_START)
== ParserRule.AT_LINE_START)
{
if(offset != line.offset)
return false;
}
else if((posMatch & ParserRule.AT_WHITESPACE_END)
== ParserRule.AT_WHITESPACE_END)
{
if(offset != whitespaceEnd)
return false;
}
else if((posMatch & ParserRule.AT_WORD_START)
== ParserRule.AT_WORD_START)
{
if(offset != lastOffset)
return false;
} //}}}
int matchedChars = 1;
CharIndexedSegment charIndexed = null;
REMatch match = null;
//{{{ See if the rule's start or end sequence matches here
if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
{
// the end cannot be a regular expression
if((checkRule.action & ParserRule.REGEXP) == 0 || end)
{
if(end)
{
if(context.spanEndSubst != null)
pattern.array = context.spanEndSubst;
else
pattern.array = checkRule.end;
}
else
pattern.array = checkRule.start;
pattern.offset = 0;
pattern.count = pattern.array.length;
matchedChars = pattern.count;
if(!SyntaxUtilities.regionMatches(context.rules
.getIgnoreCase(),line,pos,pattern.array))
{
return false;
}
}
else
{
// note that all regexps start with \A so they only
// match the start of the string
int matchStart = pos - line.offset;
charIndexed = new CharIndexedSegment(line,matchStart);
match = checkRule.startRegexp.getMatch(
charIndexed,0,RE.REG_ANCHORINDEX);
if(match == null)
return false;
else if(match.getStartIndex() != 0)
throw new InternalError("Can't happen");
else
{
matchedChars = match.getEndIndex();
/* workaround for hang if match was
* zero-width. not sure if there is
* a better way to handle this */
if(matchedChars == 0)
matchedChars = 1;
}
}
} //}}}
//{{{ Check for an escape sequence
if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
{
if(context.inRule != null)
handleRule(context.inRule,true);
escaped = !escaped;
pos += pattern.count - 1;
}
else if(escaped)
{
escaped = false;
pos += pattern.count - 1;
} //}}}
//{{{ Handle start of rule
else if(!end)
{
if(context.inRule != null)
handleRule(context.inRule,true);
markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS)
!= ParserRule.MARK_PREVIOUS);
switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
{
//{{{ SEQ
case ParserRule.SEQ:
context.spanEndSubst = null;
if((checkRule.action & ParserRule.REGEXP) != 0)
{
handleTokenWithSpaces(tokenHandler,
checkRule.token,
pos - line.offset,
matchedChars,
context);
}
else
{
tokenHandler.handleToken(line,
checkRule.token,
pos - line.offset,
matchedChars,context);
}
// a DELEGATE attribute on a SEQ changes the
// ruleset from the end of the SEQ onwards
if(checkRule.delegate != null)
{
context = new LineContext(
checkRule.delegate,
context.parent);
keywords = context.rules.getKeywords();
}
break;
//}}}
//{{{ SPAN, EOL_SPAN
case ParserRule.SPAN:
case ParserRule.EOL_SPAN:
context.inRule = checkRule;
byte tokenType = ((checkRule.action & ParserRule.EXCLUDE_MATCH)
== ParserRule.EXCLUDE_MATCH
? context.rules.getDefault() : checkRule.token);
if((checkRule.action & ParserRule.REGEXP) != 0)
{
handleTokenWithSpaces(tokenHandler,
tokenType,
pos - line.offset,
matchedChars,
context);
}
else
{
tokenHandler.handleToken(line,tokenType,
pos - line.offset,
matchedChars,context);
}
char[] spanEndSubst = null;
/* substitute result of matching the rule start
* into the end string.
*
* eg, in shell script mode, <<\s*(\w+) is
* matched into \<$1\> to construct rules for
* highlighting read-ins like this < |
| ... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.