Java example - ReaderCharSequence.java

What this is

This file is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" ^TM.
The source code

/*
 *                 Sun Public License Notice
 *
 * The contents of this file are subject to the Sun Public License
 * Version 1.0 (the "License"). You may not use this file except in
 * compliance with the License. A copy of the License is available at
 * http://www.sun.com/
 *
 * The Original Code is NetBeans. The Initial Developer of the Original
 * Code is Sun Microsystems, Inc. Portions Copyright 1997-2003 Sun
 * Microsystems, Inc. All Rights Reserved.
 */

package org.openide.compiler;

import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import org.openide.ErrorManager;

/**
 * A fake CharSequence based on a Reader.
 * The only way to parse multiline regexps from a Reader without loading its
 * entire contents into a buffer up front.
 * 
 * Initially its length is optimistically large. Previously read contents are cached.
 * When a char is needed, it is retrieved if necessary from the stream.
 * The currently reported length is also adjusted to be the currently read size
 * plus some horizon value (Integer.MAX_VALUE may be used to set the horizon at
 * infinity for highest accuracy but poor performance for negative searches).
 * When EOF is hit, the length is corrected, and then any attempts to read
 * "past" the end of the file result in U+FFFF (no character).
 * You may prune prefixes of the buffer that you no longer need, to limit memory
 * consumption; in this case attempts to read from the pruned prefix also result
 * in U+FFFF.
 * @author Jesse Glick
 * @see http://developer.java.sun.com/developer/bugParade/bugs/4607121.html
 */
final class ReaderCharSequence implements CharSequence {
    
    /** associated stream */
    private final Reader r;
    /** buffer of characters that have been read */
    private char[] buf;
    /** current length (max index + 1) */
    private int size = 0;
    /** min index, if pruned */
    private int origin = 0;
    /** if not eof, reported length (size + some amount) */
    private int horizon;
    /** if true, have hit eof */
    private boolean eof = false;
    /** load factor as for collections */
    private final float loadFactor;
    
    private void invariants() {
        assert buf.length > 0;
        assert size >= 0;
        assert origin >= 0;
        assert origin <= size;
        assert horizon >= size;
        /*
        System.err.println("size=" + size + " origin=" + origin + " horizon=" + horizon + " buf.length=" + buf.length);
        char[] x = new char[buf.length];
        for (int i = 0; i < x.length; i++) {
            char c = buf[i];
            x[i] = (c == '\u0000') ? '#' : c;
        }
        System.err.println(x);
         */
    }
    
    /**
     * Create a sequence from a reader.
     * You probably want this wrapped in a BufferedReader if you haven't already.
     * The initial buffer size is 4Kb, the horizon is set to infinity, and the load factor to 4/3.
     */
    public ReaderCharSequence(Reader r) {
        this(r, 4096, Integer.MAX_VALUE, 4.0f / 3.0f);
    }
    
    /**
     * Create a sequence from a reader with finer control over performance.
     */
    public ReaderCharSequence(Reader r, int bufsize, int horizon, float loadFactor) {
        this.r = r;
        buf = new char[bufsize];
        this.horizon = horizon;
        this.loadFactor = loadFactor;
    }
    
    /**
     * Additional method permitting you to prune an old section of the buffer
     * that will no longer be needed. After this is called, any attempts to
     * read from the interval [0,pos) will yield U+FFFF. Buffer reallocation will
     * only be needed if the current size minus the last prune position is greater
     * than the current buffer size. If the attempted prune position is greater than
     * the actual number of read characters (but less than the reported length), it
     * is quietly limited to the actual size. If the attempted prune position is
     * less than the last set prune position (initially zero, i.e. only for repeated
     * calls) then this method is a no-op (the interval is already pruned).
     * 
Typical usage:
     * 
     * ReaderCharSequence s = new ReaderCharSequence(reader);
     * Matcher m = pattern.matcher(s);
     * while (m.find()) {
     *     // as usual
     *     // save memory:
     *     s.prune(m.end());
     * }
     * 
     * @param pos an index to prune before
     * @throws IndexOutOfBoundsException unless 0 <= pos <= length()
     */
    public void prune(int pos) throws IndexOutOfBoundsException {
        if (pos < 0) throw new IndexOutOfBoundsException();
        if (pos > length()) throw new IndexOutOfBoundsException();
        if (pos > origin && pos <= size) {
            int delta = pos - origin;
            origin = pos;
        }
        invariants();
    }
    
    public char charAt(int index) {
        if (index < 0 || index >= horizon) {
            throw new IndexOutOfBoundsException();
        }
        while (true) {
            if (index < origin) {
                return '\uFFFF';
            } else if (index < size) {
                return buf[index % buf.length];
            } else if (eof) {
                return '\uFFFF';
            } else {
                readOneChar();
            }
        }
    }
    
    private void readOneChar() {
        assert !eof;
        int c;
        try {
            c = r.read();
        } catch (IOException e) {
            ErrorManager.getDefault().notify(ErrorManager.INFORMATIONAL, e);
            c = -1;
        }
        if (c == -1) {
            eof = true;
        } else {
            char cc = (char)c;
            if (size - origin == buf.length) {
                realloc();
            } else {
                assert size - origin < buf.length;
            }
            buf[size++ % buf.length] = cc;
            if (horizon != Integer.MAX_VALUE) {
                horizon++;
            }
        }
        invariants();
    }
    
    private void realloc() {
        int l1 = buf.length;
        int l2 = (int)(l1 * loadFactor);
        char[] nbuf = new char[l2];
        int delta = l2 - l1;
        // Copying buf[[origin,size) % l1] -> nbuf[[origin,size) % l2].
        // XXX faster to use System.arraycopy... figure out the cases (4?)
        for (int i = origin; i < size; i++) {
            nbuf[i % l2] = buf[i % l1];
        }
        buf = nbuf;
        invariants();
    }
    
    public int length() {
        if (eof) {
            return size;
        } else {
            return horizon;
        }
    }
    
    public CharSequence subSequence(int start, int end) {
        return new SubCharSequence(start, end);
    }
    
    private String substring(int start, int end) {
        assert start >= 0;
        assert start <= end;
        assert end <= size;
        int l = buf.length;
        if (start >= origin && start % l <= end % l) {
            return new String(buf, start % l, end - start);
        } else {
            char[] nbuf = new char[end - start];
            int start2;
            if (start < origin) {
                Arrays.fill(nbuf, 0, origin - start, '\uFFFF');
                start2 = origin;
            } else {
                start2 = start;
            }
            // XXX faster to use System.arraycopy... figure out the cases (2)
            for (int i = start2; i < end; i++) {
                nbuf[i - start] = buf[i % buf.length];
            }
            return new String(nbuf);
        }
    }
    
    public String toString() {
        while (!eof) {
            readOneChar();
        }
        return substring(0, size);
    }
    
    private final class SubCharSequence implements CharSequence {
        
        private final int start;
        private final int end;
        
        public SubCharSequence(int start, int end) {
            this.start = start;
            this.end = end;
        }
        
        public char charAt(int index) {
            if (index < 0 || index >= length()) {
                throw new IndexOutOfBoundsException();
            }
            return ReaderCharSequence.this.charAt(start + index);
        }
        
        public int length() {
            if (eof && start > size) {
                return 0;
            } else if (eof && end > size) {
                return size - start;
            } else {
                return end - start;
            }
        }
        
        public CharSequence subSequence(int start, int end) {
            return new SubCharSequence(this.start + start, this.start + end);
        }
        
        public String toString() {
            while (!eof && end > size) {
                readOneChar();
            }
            if (start > size) {
                return new String();
            } else if (end > size) {
                return substring(start, size);
            } else {
                return substring(start, end);
            }
        }
        
    }
    
}
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.
What this is

Other links

The source code

new blog posts

... this post is sponsored by my books ...
#1 New Release!	FP Best Seller