alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (TokenList.java)

This example Java source code file (TokenList.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

arraylist, beg0, charsequence, collection, empty, end0, hint, listiterator, modc, object, string, stringbuilder, tokenlist, unsupportedoperationexception, util

The TokenList.java Java example source code

/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package xmlkit; // -*- mode: java; indent-tabs-mode: nil -*-

import java.util.*;

/**
 * A List of Strings each representing a word or token.
 * This object itself is a CharSequence whose characters consist
 * of all the tokens, separated by blanks.
 *
 * @author jrose
 */
public class TokenList extends ArrayList<String> implements CharSequence {

    protected String separator;
    protected boolean frozen;

    public TokenList() {
        this.separator = " ";
    }

    public TokenList(Collection<? extends Object> tokens) {
        super(tokens.size());
        this.separator = " ";
        addTokens(tokens);
    }

    public TokenList(Collection<? extends Object> tokens, String separator) {
        super(tokens.size());
        this.separator = separator;
        addTokens(tokens);
    }

    public TokenList(Object[] tokens) {
        super(tokens.length);
        this.separator = " ";
        addTokens(tokens, 0, tokens.length);
    }

    public TokenList(Object[] tokens, int beg, int end) {
        super(end - beg);  // capacity
        this.separator = " ";
        addTokens(tokens, beg, end);
    }

    public TokenList(Object[] tokens, int beg, int end, String separator) {
        super(end - beg);  // capacity
        this.separator = separator;
        addTokens(tokens, beg, end);
    }

    public TokenList(String tokenStr) {
        this(tokenStr, " ", false);
    }

    public TokenList(String tokenStr, String separator) {
        this(tokenStr, separator, true);
    }

    public TokenList(String tokenStr, String separator, boolean allowNulls) {
        super(tokenStr.length() / 5);
        this.separator = separator;
        addTokens(tokenStr, allowNulls);
    }
    static public final TokenList EMPTY;

    static {
        TokenList tl = new TokenList(new Object[0]);
        tl.freeze();
        EMPTY = tl;
    }

    public void freeze() {
        if (!frozen) {
            for (ListIterator<String> i = listIterator(); i.hasNext();) {
                i.set(i.next().toString());
            }
            trimToSize();
            frozen = true;
        }
    }

    public boolean isFrozen() {
        return frozen;
    }

    void checkNotFrozen() {
        if (isFrozen()) {
            throw new UnsupportedOperationException("cannot modify frozen TokenList");
        }
    }

    public String getSeparator() {
        return separator;
    }

    public void setSeparator(String separator) {
        checkNotFrozen();
        this.separator = separator;
    }

    /// All normal List mutators must check the frozen bit:
    public String set(int index, String o) {
        checkNotFrozen();
        return super.set(index, o);
    }

    public boolean add(String o) {
        checkNotFrozen();
        return super.add(o);
    }

    public void add(int index, String o) {
        checkNotFrozen();
        super.add(index, o);
    }

    public boolean addAll(Collection<? extends String> c) {
        checkNotFrozen();
        return super.addAll(c);
    }

    public boolean addAll(int index, Collection<? extends String> c) {
        checkNotFrozen();
        return super.addAll(index, c);
    }

    public boolean remove(Object o) {
        checkNotFrozen();
        return super.remove(o);
    }

    public String remove(int index) {
        checkNotFrozen();
        return super.remove(index);
    }

    public void clear() {
        checkNotFrozen();
        super.clear();
    }

    /** Add a collection of tokens to the list, applying toString to each. */
    public boolean addTokens(Collection<? extends Object> tokens) {
        // Note that if this sequence is empty, no tokens are added.
        // This is different from adding a null string, which is
        // a single token.
        boolean added = false;
        for (Object token : tokens) {
            add(token.toString());
            added = true;
        }
        return added;
    }

    public boolean addTokens(Object[] tokens, int beg, int end) {
        boolean added = false;
        for (int i = beg; i < end; i++) {
            add(tokens[i].toString());
            added = true;
        }
        return added;
    }

    public boolean addTokens(String tokenStr) {
        return addTokens(tokenStr, false);
    }

    public boolean addTokens(String tokenStr, boolean allowNulls) {
        boolean added = false;
        int pos = 0, limit = tokenStr.length(), sep = limit;
        while (pos < limit) {
            sep = tokenStr.indexOf(separator, pos);
            if (sep < 0) {
                sep = limit;
            }
            if (sep == pos) {
                if (allowNulls) {
                    add("");
                    added = true;
                }
                pos += separator.length();
            } else {
                add(tokenStr.substring(pos, sep));
                added = true;
                pos = sep + separator.length();
            }
        }
        if (allowNulls && sep < limit) {
            // Input was something like "tok1 tok2 ".
            add("");
            added = true;
        }
        return added;
    }

    public boolean addToken(Object token) {
        return add(token.toString());
    }

    /** Format the token string, using quotes and escapes.
     *  Quotes must contain an odd number of 3 or more elements,
     *  a sequence of begin/end quote pairs, plus a superquote.
     *  For each token, the first begin/end pair is used for
     *  which the end quote does not occur in the token.
     *  If the token contains all end quotes, the last pair
     *  is used, with all occurrences of the end quote replaced
     *  by the superquote.  If an end quote is the empty string,
     *  the separator is used instead.
     */
    public String format(String separator, String[] quotes) {
        return ""; //@@
    }
    protected int[] lengths;
    protected static final int MODC = 0, HINT = 1, BEG0 = 2, END0 = 3;

    // Layout of lengths:
    //   { modCount, hint, -1==beg[0], end[0]==beg[1], ..., length }
    // Note that each beg[i]..end[i] span includes a leading separator,
    // which is not part of the corresponding token.
    protected final CharSequence getCS(int i) {
        return (CharSequence) get(i);
    }

    // Produce (and cache) an table of indexes for each token.
    protected int[] getLengths() {
        int[] lengths = this.lengths;
        ;
        int sepLength = separator.length();
        if (lengths == null || lengths[MODC] != modCount) {
            int size = this.size();
            lengths = new int[END0 + size + (size == 0 ? 1 : 0)];
            lengths[MODC] = modCount;
            int end = -sepLength;  // cancels leading separator
            lengths[BEG0] = end;
            for (int i = 0; i < size; i++) {
                end += sepLength;  // count leading separator
                end += getCS(i).length();
                lengths[END0 + i] = end;
            }
            this.lengths = lengths;
        }
        return lengths;
    }

    public int length() {
        int[] lengths = getLengths();
        return lengths[lengths.length - 1];
    }

    // Which token does the given index belong to?
    protected int which(int i) {
        if (i < 0) {
            return -1;
        }
        int[] lengths = getLengths();
        for (int hint = lengths[HINT];; hint = 0) {
            for (int wh = hint; wh < lengths.length - END0; wh++) {
                int beg = lengths[BEG0 + wh];
                int end = lengths[END0 + wh];
                if (i >= beg && i < end) {
                    lengths[HINT] = wh;
                    return wh;
                }
            }
            if (hint == 0) {
                return size();  // end of the line
            }
        }
    }

    public char charAt(int i) {
        if (i < 0) {
            return "".charAt(i);
        }
        int wh = which(i);
        int beg = lengths[BEG0 + wh];
        int j = i - beg;
        int sepLength = separator.length();
        if (j < sepLength) {
            return separator.charAt(j);
        }
        return getCS(wh).charAt(j - sepLength);
    }

    public CharSequence subSequence(int beg, int end) {
        //System.out.println("i: "+beg+".."+end);
        if (beg == end) {
            return "";
        }
        if (beg < 0) {
            charAt(beg);  // raise exception
        }
        if (beg > end) {
            charAt(-1);   // raise exception
        }
        int begWh = which(beg);
        int endWh = which(end);
        if (endWh == size() || end == lengths[BEG0 + endWh]) {
            --endWh;
        }
        //System.out.println("wh: "+begWh+".."+endWh);
        int begBase = lengths[BEG0 + begWh];
        int endBase = lengths[BEG0 + endWh];
        int sepLength = separator.length();
        int begFrag = 0;
        if ((beg - begBase) < sepLength) {
            begFrag = sepLength - (beg - begBase);
            beg += begFrag;
        }
        int endFrag = 0;
        if ((end - endBase) < sepLength) {
            endFrag = (end - endBase);
            end = endBase;
            endBase = lengths[BEG0 + --endWh];
        }
        if (false) {
            System.out.print("beg[wbf]end[wbf]");
            int pr[] = {begWh, begBase, begFrag, beg, endWh, endBase, endFrag, end};
            for (int k = 0; k < pr.length; k++) {
                System.out.print((k == 4 ? "   " : " ") + (pr[k]));
            }
            System.out.println();
        }
        if (begFrag > 0 && (end + endFrag) - begBase <= sepLength) {
            // Special case:  Slice the separator.
            beg -= begFrag;
            end += endFrag;
            return separator.substring(beg - begBase, end - begBase);
        }
        if (begWh == endWh && (begFrag + endFrag) == 0) {
            // Special case:  Slice a single token.
            return getCS(begWh).subSequence(beg - begBase - sepLength,
                    end - endBase - sepLength);
        }
        Object[] subTokens = new Object[1 + (endWh - begWh) + 1];
        int fillp = 0;
        if (begFrag == sepLength) {
            // Insert a leading null token to force an initial separator.
            subTokens[fillp++] = "";
            begFrag = 0;
        }
        for (int wh = begWh; wh <= endWh; wh++) {
            CharSequence cs = getCS(wh);
            if (wh == begWh || wh == endWh) {
                // Slice it.
                int csBeg = (wh == begWh) ? (beg - begBase) - sepLength : 0;
                int csEnd = (wh == endWh) ? (end - endBase) - sepLength : cs.length();
                cs = cs.subSequence(csBeg, csEnd);
                if (begFrag > 0 && wh == begWh) {
                    cs = separator.substring(sepLength - begFrag) + cs;
                }
                if (endFrag > 0 && wh == endWh) {
                    cs = cs.toString() + separator.substring(0, endFrag);
                }
            }
            subTokens[fillp++] = cs;
        }
        return new TokenList(subTokens, 0, fillp, separator);
    }

    /** Returns the concatenation of all tokens,
     *  with intervening separator characters.
     */
    public String toString() {
        StringBuilder buf = new StringBuilder(length());
        int size = this.size();
        for (int i = 0; i < size; i++) {
            if (i > 0) {
                buf.append(separator);
            }
            buf.append(get(i));
        }
        return buf.toString();
    }

    /*---- TESTING CODE ----
    public static void main(String[] av) {
    if (av.length == 0)  av = new String[]{"one", "2", "", "four"};
    TokenList ts = new TokenList();
    final String SEP = ", ";
    ts.setSeparator(SEP);
    for (int i = -1; i < av.length; i++) {
    if (i >= 0)  ts.addToken(av[i]);
    {
    TokenList tsCopy = new TokenList(ts.toString(), SEP);
    if (!tsCopy.equals(ts)) {
    tsCopy.setSeparator(")(");
    System.out.println("!= ("+tsCopy+")");
    }
    }
    {
    TokenList tsBar = new TokenList(ts, "|");
    tsBar.add(0, "[");
    tsBar.add("]");
    System.out.println(tsBar);
    }
    if (false) {
    int[] ls = ts.getLengths();
    System.out.println("ts: "+ts);
    System.out.print("ls: {");
    for (int j = 0; j < ls.length; j++)  System.out.print(" "+ls[j]);
    System.out.println(" }");
    }
    assert0(ts.size() == i+1);
    assert0(i < 0 || ts.get(i) == av[i]);
    String tss = ts.toString();
    int tslen = tss.length();
    assert0(ts.length() == tss.length());
    for (int n = 0; n < tslen; n++) {
    assert0(ts.charAt(n) == tss.charAt(n));
    }
    for (int j = 0; j < tslen; j++) {
    for (int k = tslen; k >= j; k--) {
    CharSequence sub = ts.subSequence(j, k);
    //System.out.println("|"+sub+"|");
    assert0(sub.toString().equals(tss.substring(j, k)));
    }
    }
    }
    }
    static void assert0(boolean z) {
    if (!z)  throw new RuntimeException("assert failed");
    }
    // ---- TESTING CODE ----*/
}

Other Java examples (source code examples)

Here is a short list of links related to this Java TokenList.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.