alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Commons IO example source code file (BOMInputStream.java)

This example Commons IO source code file (BOMInputStream.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Commons IO tags/keywords

bominputstream, bominputstream, boms, byteordermark, byteordermark, illegalargumentexception, io, ioexception, ioexception, list, no, override, override, stream, string, util

The Commons IO BOMInputStream.java source code

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.io.input;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.ByteOrderMark;

/**
 * This class is used to wrap a stream that includes an encoded
 * {@link ByteOrderMark} as its first bytes.
 *
 * This class detects these bytes and, if required, can automatically skip them
 * and return the subsequent byte as the first byte in the stream.
 *
 * The {@link ByteOrderMark} implementation has the following pre-defined BOMs:
 * <ul>
 *   <li>UTF-8 - {@link ByteOrderMark#UTF_8}
 *   <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}
 *   <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}
 * </ul>
 *
 *
 * <h3>Example 1 - Detect and exclude a UTF-8 BOM
 * <pre>
 *      BOMInputStream bomIn = new BOMInputStream(in);
 *      if (bomIn.hasBOM()) {
 *          // has a UTF-8 BOM
 *      }
 * </pre>
 *
 * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)
 * <pre>
 *      boolean include = true;
 *      BOMInputStream bomIn = new BOMInputStream(in, include);
 *      if (bomIn.hasBOM()) {
 *          // has a UTF-8 BOM
 *      }
 * </pre>
 *
 * <h3>Example 3 - Detect Multiple BOMs
 * <pre>
 *      BOMInputStream bomIn = new BOMInputStream(in, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE);
 *      if (bomIn.hasBOM() == false) {
 *          // No BOM found
 *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
 *          // has a UTF-16LE BOM
 *      } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
 *          // has a UTF-16BE BOM
 *      }
 * </pre>
 *
 * @see org.apache.commons.io.ByteOrderMark
 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark
 * @version $Revision: 1005099 $ $Date: 2010-10-06 17:13:01 +0100 (Wed, 06 Oct 2010) $
 * @since Commons IO 2.0
 */
public class BOMInputStream extends ProxyInputStream {
    private final boolean include;
    private final List<ByteOrderMark> boms;
    private ByteOrderMark byteOrderMark;
    private int[] firstBytes;
    private int fbLength;
    private int fbIndex;
    private int markFbIndex;
    private boolean markedAtStart;

    /**
     * Constructs a new BOM InputStream that excludes
     * a {@link ByteOrderMark#UTF_8} BOM.
     * @param delegate the InputStream to delegate to
     */
    public BOMInputStream(InputStream delegate) {
        this(delegate, false, ByteOrderMark.UTF_8);
    }

    /**
     * Constructs a new BOM InputStream that detects a
     * a {@link ByteOrderMark#UTF_8} and optionally includes it.
     * @param delegate the InputStream to delegate to
     * @param include true to include the UTF-8 BOM or
     * false to exclude it
     */
    public BOMInputStream(InputStream delegate, boolean include) {
        this(delegate, include, ByteOrderMark.UTF_8);
    }

    /**
     * Constructs a new BOM InputStream that excludes
     * the specified BOMs.
     * @param delegate the InputStream to delegate to
     * @param boms The BOMs to detect and exclude
     */
    public BOMInputStream(InputStream delegate, ByteOrderMark... boms) {
        this(delegate, false, boms);
    }

    /**
     * Constructs a new BOM InputStream that detects the
     * specified BOMs and optionally includes them.
     * @param delegate the InputStream to delegate to
     * @param include true to include the specified BOMs or
     * false to exclude them
     * @param boms The BOMs to detect and optionally exclude
     */
    public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) {
        super(delegate);
        if (boms == null || boms.length == 0) {
            throw new IllegalArgumentException("No BOMs specified");
        }
        this.include = include;
        this.boms = Arrays.asList(boms);
    }

    /**
     * Indicates whether the stream contains one of the specified BOMs.
     *
     * @return true if the stream has one of the specified BOMs, otherwise false
     * if it does not
     * @throws IOException if an error reading the first bytes of the stream occurs
     */
    public boolean hasBOM() throws IOException {
        return (getBOM() != null);
    }

    /**
     * Indicates whether the stream contains the specified BOM.
     *
     * @param bom The BOM to check for
     * @return true if the stream has the specified BOM, otherwise false
     * if it does not
     * @throws IllegalArgumentException if the BOM is not one the stream
     * is configured to detect
     * @throws IOException if an error reading the first bytes of the stream occurs
     */
    public boolean hasBOM(ByteOrderMark bom) throws IOException {
        if (!boms.contains(bom)) {
            throw new IllegalArgumentException("Stream not configure to detect " + bom);
        }
        return (byteOrderMark != null && getBOM().equals(bom));
    }

    /**
     * Return the BOM (Byte Order Mark).
     *
     * @return The BOM or null if none
     * @throws IOException if an error reading the first bytes of the stream occurs
     */
    public ByteOrderMark getBOM() throws IOException {
        if (firstBytes == null) {
            int max = 0;
            for (ByteOrderMark bom : boms) {
                max = Math.max(max, bom.length());
            }
            firstBytes = new int[max];
            for (int i = 0; i < firstBytes.length; i++) {
                firstBytes[i] = in.read();
                fbLength++;
                if (firstBytes[i] < 0) {
                    break;
                }

                byteOrderMark = find();
                if (byteOrderMark != null) {
                    if (!include) {
                        fbLength = 0;
                    }
                    break;
                }
            }
        }
        return byteOrderMark;
    }

    /**
     * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}.
     *
     * @return The BOM charset Name or null if no BOM found
     * @throws IOException if an error reading the first bytes of the stream occurs
     * 
     */
    public String getBOMCharsetName() throws IOException {
        getBOM();
        return (byteOrderMark == null ? null : byteOrderMark.getCharsetName());
    }

    /**
     * This method reads and either preserves or skips the first bytes in the
     * stream. It behaves like the single-byte <code>read() method,
     * either returning a valid byte or -1 to indicate that the initial bytes
     * have been processed already.
     * @return the byte read (excluding BOM) or -1 if the end of stream
     * @throws IOException if an I/O error occurs
     */
    private int readFirstBytes() throws IOException {
        getBOM();
        return (fbIndex < fbLength) ? firstBytes[fbIndex++] : -1;
    }

    /**
     * Find a BOM with the specified bytes.
     *
     * @return The matched BOM or null if none matched
     */
    private ByteOrderMark find() {
        for (ByteOrderMark bom : boms) {
            if (matches(bom)) {
                return bom;
            }
        }
        return null;
    }

    /**
     * Check if the bytes match a BOM.
     *
     * @param bom The BOM
     * @return true if the bytes match the bom, otherwise false
     */
    private boolean matches(ByteOrderMark bom) {
        if (bom.length() != fbLength) {
            return false;
        }
        for (int i = 0; i < bom.length(); i++) {
            if (bom.get(i) != firstBytes[i]) {
                return false;
            }
        }
        return true;
    }

    //----------------------------------------------------------------------------
    //  Implementation of InputStream
    //----------------------------------------------------------------------------

    /**
     * Invokes the delegate's <code>read() method, detecting and
     * optionally skipping BOM.
     * @return the byte read (excluding BOM) or -1 if the end of stream
     * @throws IOException if an I/O error occurs
     */
    @Override
    public int read() throws IOException {
        int b = readFirstBytes();
        return (b >= 0) ? b : in.read();
    }

    /**
     * Invokes the delegate's <code>read(byte[], int, int) method, detecting
     * and optionally skipping BOM.
     * @param buf the buffer to read the bytes into
     * @param off The start offset
     * @param len The number of bytes to read (excluding BOM)
     * @return the number of bytes read or -1 if the end of stream
     * @throws IOException if an I/O error occurs
     */
    @Override
    public int read(byte[] buf, int off, int len) throws IOException {
        int firstCount = 0;
        int b = 0;
        while ((len > 0) && (b >= 0)) {
            b = readFirstBytes();
            if (b >= 0) {
                buf[off++] = (byte) (b & 0xFF);
                len--;
                firstCount++;
            }
        }
        int secondCount = in.read(buf, off, len);
        return (secondCount < 0) ? firstCount : firstCount + secondCount;
    }

    /**
     * Invokes the delegate's <code>read(byte[]) method, detecting and
     * optionally skipping BOM.
     * @param buf the buffer to read the bytes into
     * @return the number of bytes read (excluding BOM)
     * or -1 if the end of stream
     * @throws IOException if an I/O error occurs
     */
    @Override
    public int read(byte[] buf) throws IOException {
        return read(buf, 0, buf.length);
    }

    /**
     * Invokes the delegate's <code>mark(int) method.
     * @param readlimit read ahead limit
     */
    @Override
    public synchronized void mark(int readlimit) {
        markFbIndex = fbIndex;
        markedAtStart = (firstBytes == null);
        in.mark(readlimit);
    }

    /**
     * Invokes the delegate's <code>reset() method.
     * @throws IOException if an I/O error occurs
     */
    @Override
    public synchronized void reset() throws IOException {
        fbIndex = markFbIndex;
        if (markedAtStart) {
            firstBytes = null;
        }

        in.reset();
    }

    /**
     * Invokes the delegate's <code>skip(long) method, detecting
     * and optionallyskipping BOM.
     * @param n the number of bytes to skip
     * @return the number of bytes to skipped or -1 if the end of stream
     * @throws IOException if an I/O error occurs
     */
    @Override
    public long skip(long n) throws IOException {
        while ((n > 0) && (readFirstBytes() >= 0)) {
            n--;
        }
        return in.skip(n);
    }
}

Other Commons IO examples (source code examples)

Here is a short list of links related to this Commons IO BOMInputStream.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.