alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Other links

The source code

/*
 *  gnu/regexp/RETokenRepeated.java
 *  Copyright (C) 1998-2001 Wes Biggs
 *
 *  This library is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published
 *  by the Free Software Foundation; either version 2.1 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

package gnu.regexp;
import java.util.Vector;

final class RETokenRepeated extends REToken {
    private REToken token;
    private int min,max;
    private boolean stingy;
    
    RETokenRepeated(int subIndex, REToken token, int min, int max) {
	super(subIndex);
	this.token = token;
	this.min = min;
	this.max = max;
    }

    /** Sets the minimal matching mode to true. */
    void makeStingy() {
	stingy = true;
    }
    
    /** Queries if this token has minimal matching enabled. */
    boolean isStingy() {
	return stingy;
    }
    
    /**
     * The minimum length of a repeated token is the minimum length
     * of the token multiplied by the minimum number of times it must
     * match.
     */
    int getMinimumLength() {
	return (min * token.getMinimumLength());
    }

    // We do need to save every possible point, but the number of clone()
    // invocations here is really a killer for performance on non-stingy
    // repeat operators.  I'm open to suggestions...

    // Hypothetical question: can you have a RE that matches 1 times,
    // 3 times, 5 times, but not 2 times or 4 times?  Does having
    // the subexpression back-reference operator allow that?

    boolean match(CharIndexed input, REMatch mymatch) {
	// number of times we've matched so far
	int numRepeats = 0; 
	
	// Possible positions for the next repeat to match at
	REMatch newMatch = mymatch;
	REMatch last = null;
	REMatch current;

	// Add the '0-repeats' index
	// positions.elementAt(z) == position [] in input after <> matches
	Vector positions = new Vector();
	positions.addElement(newMatch);
	
	// Declare variables used in loop
	REMatch doables;
	REMatch doablesLast;
	REMatch recurrent;

	do {
	    // Check for stingy match for each possibility.
	    if (stingy && (numRepeats >= min)) {
		REMatch result = matchRest(input, newMatch);
		if (result != null) {
		    mymatch.assignFrom(result);
		    return true;
		}
	    }

	    doables = null;
	    doablesLast = null;

	    // try next repeat at all possible positions
	    for (current = newMatch; current != null; current = current.next) {
		recurrent = (REMatch) current.clone();
		if (token.match(input, recurrent)) {
		    // add all items in current to doables array
		    if (doables == null) {
			doables = recurrent;
			doablesLast = recurrent;
		    } else {
			// Order these from longest to shortest
			// Start by assuming longest (more repeats)
			doablesLast.next = recurrent;
		    }
		    // Find new doablesLast
		    while (doablesLast.next != null) {
			doablesLast = doablesLast.next;
		    }
		}
	    }
	    // if none of the possibilities worked out, break out of do/while
	    if (doables == null) break;
	    
	    // reassign where the next repeat can match
	    newMatch = doables;
	    
	    // increment how many repeats we've successfully found
	    ++numRepeats;
	    
	    positions.addElement(newMatch);
	} while (numRepeats < max);
	
	// If there aren't enough repeats, then fail
	if (numRepeats < min) return false;
	
	// We're greedy, but ease off until a true match is found 
	int posIndex = positions.size();
	
	// At this point we've either got too many or just the right amount.
	// See if this numRepeats works with the rest of the regexp.
	REMatch allResults = null;
	REMatch allResultsLast = null;

	REMatch results = null;
	while (--posIndex >= min) {
	    newMatch = (REMatch) positions.elementAt(posIndex);
	    results = matchRest(input, newMatch);
	    if (results != null) {
		if (allResults == null) {
		    allResults = results;
		    allResultsLast = results;
		} else {
		    // Order these from longest to shortest
		    // Start by assuming longest (more repeats)
		    allResultsLast.next = results;
		}
		// Find new doablesLast
		while (allResultsLast.next != null) {
		    allResultsLast = allResultsLast.next;
		}
	    }
	    // else did not match rest of the tokens, try again on smaller sample
	}
	if (allResults != null) {
	    mymatch.assignFrom(allResults); // does this get all?
	    return true;
	}
	// If we fall out, no matches.
	return false;
    }

    private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
	REMatch current, single;
	REMatch doneIndex = null;
	REMatch doneIndexLast = null;
	// Test all possible matches for this number of repeats
	for (current = newMatch; current != null; current = current.next) {
	    // clone() separates a single match from the chain
	    single = (REMatch) current.clone();
	    if (next(input, single)) {
		// chain results to doneIndex
		if (doneIndex == null) {
		    doneIndex = single;
		    doneIndexLast = single;
		} else {
		    doneIndexLast.next = single;
		}
		// Find new doneIndexLast
		while (doneIndexLast.next != null) {
		    doneIndexLast = doneIndexLast.next;
		}
	    }
	}
	return doneIndex;
    }

    void dump(StringBuffer os) {
	os.append("(?:");
	token.dumpAll(os);
	os.append(')');
	if ((max == Integer.MAX_VALUE) && (min <= 1))
	    os.append( (min == 0) ? '*' : '+' );
	else if ((min == 0) && (max == 1))
	    os.append('?');
	else {
	    os.append('{').append(min);
	    if (max > min) {
		os.append(',');
		if (max != Integer.MAX_VALUE) os.append(max);
	    }
	    os.append('}');
	}
	if (stingy) os.append('?');
    }
}
... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.