alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (COMPOUND_TEXT_Encoder.java)

This example Java source code file (COMPOUND_TEXT_Encoder.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

bytearrayoutputstream, bytebuffer, charbuffer, charsetencoder, coderresult, compound_text_encoder, compoundtextsupport, illegalargumentexception, internalerror, iso8859_1, map, nio, string, unknown, unsupportedoperationexception, util

The COMPOUND_TEXT_Encoder.java Java example source code

/*
 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package sun.nio.cs.ext;

import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.*;

import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class COMPOUND_TEXT_Encoder extends CharsetEncoder {

    /**
     * NOTE: The following four static variables should be used *only* for
     * testing whether a encoder can encode a specific character. They
     * cannot be used for actual encoding because they are shared across all
     * COMPOUND_TEXT encoders and may be stateful.
     */
    private static final Map<String,CharsetEncoder> encodingToEncoderMap =
      Collections.synchronizedMap(new HashMap<String,CharsetEncoder>(21, 1.0f));
    private static final CharsetEncoder latin1Encoder;
    private static final CharsetEncoder defaultEncoder;
    private static final boolean defaultEncodingSupported;

    static {
        CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
        String encoding = encoder.charset().name();
        if ("ISO8859_1".equals(encoding)) {
            latin1Encoder = encoder;
            defaultEncoder = encoder;
            defaultEncodingSupported = true;
        } else {
            try {
                latin1Encoder =
                    Charset.forName("ISO8859_1").newEncoder();
            } catch (IllegalArgumentException e) {
                throw new ExceptionInInitializerError
                    ("ISO8859_1 unsupported");
            }
            defaultEncoder = encoder;
            defaultEncodingSupported = CompoundTextSupport.getEncodings().
                contains(defaultEncoder.charset().name());
        }
    }

    private CharsetEncoder encoder;
    private char[] charBuf = new char[1];
    private CharBuffer charbuf = CharBuffer.wrap(charBuf);
    private ByteArrayOutputStream nonStandardCharsetBuffer;
    private byte[] byteBuf;
    private ByteBuffer bytebuf;
    private int numNonStandardChars, nonStandardEncodingLen;

    public COMPOUND_TEXT_Encoder(Charset cs) {
        super(cs,
              (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
              (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
        try {
            encoder = Charset.forName("ISO8859_1").newEncoder();
        } catch (IllegalArgumentException cannotHappen) {}
        initEncoder(encoder);
    }

    protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
        CoderResult cr = CoderResult.UNDERFLOW;
        char[] input = src.array();
        int inOff = src.arrayOffset() + src.position();
        int inEnd = src.arrayOffset() + src.limit();

        try {
            while (inOff < inEnd && cr.isUnderflow()) {
                charBuf[0] = input[inOff];
                if (charBuf[0] <= '\u0008' ||
                    (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') ||
                    (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
                    // The compound text specification only permits the octets
                    // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
                    // 9B must also be removed because they initiate control
                    // sequences.
                    charBuf[0] = '?';
                }

                CharsetEncoder enc = getEncoder(charBuf[0]);
                //System.out.println("char=" + charBuf[0] + ", enc=" + enc);
                if (enc == null) {
                    if (unmappableCharacterAction()
                        == CodingErrorAction.REPORT) {
                        charBuf[0] = '?';
                        enc = latin1Encoder;
                    } else {
                        return CoderResult.unmappableForLength(1);
                    }
                }
                if (enc != encoder) {
                    if (nonStandardCharsetBuffer != null) {
                        cr = flushNonStandardCharsetBuffer(des);
                    } else {
                        //cr= encoder.flush(des);
                        flushEncoder(encoder, des);
                    }
                    if (!cr.isUnderflow())
                        return cr;
                    byte[] escSequence = CompoundTextSupport.
                        getEscapeSequence(enc.charset().name());
                    if (escSequence == null) {
                        throw new InternalError("Unknown encoding: " +
                                                enc.charset().name());
                    } else if (escSequence[1] == (byte)0x25 &&
                               escSequence[2] == (byte)0x2F) {
                        initNonStandardCharsetBuffer(enc, escSequence);
                    } else if (des.remaining() >= escSequence.length) {
                        des.put(escSequence, 0, escSequence.length);
                    } else {
                        return CoderResult.OVERFLOW;
                    }
                    encoder = enc;
                    continue;
                }
                charbuf.rewind();
                if (nonStandardCharsetBuffer == null) {
                    cr = encoder.encode(charbuf, des, false);
                } else {
                    bytebuf.clear();
                    cr = encoder.encode(charbuf, bytebuf, false);
                    bytebuf.flip();
                    nonStandardCharsetBuffer.write(byteBuf,
                                                   0, bytebuf.limit());
                    numNonStandardChars++;
                }
                inOff++;
            }
            return cr;
        } finally {
            src.position(inOff - src.arrayOffset());
        }
    }

    protected CoderResult implFlush(ByteBuffer out) {
        CoderResult cr = (nonStandardCharsetBuffer != null)
            ? flushNonStandardCharsetBuffer(out)
            //: encoder.flush(out);
            : flushEncoder(encoder, out);
        reset();
        return cr;
    }

    private void initNonStandardCharsetBuffer(CharsetEncoder c,
                                              byte[] escSequence)
    {
        nonStandardCharsetBuffer = new ByteArrayOutputStream();
        byteBuf = new byte[(int)c.maxBytesPerChar()];
        bytebuf = ByteBuffer.wrap(byteBuf);
        nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length);
        nonStandardCharsetBuffer.write(0); // M placeholder
        nonStandardCharsetBuffer.write(0); // L placeholder
        byte[] encoding = CompoundTextSupport.
            getEncoding(c.charset().name());
        if (encoding == null) {
            throw new InternalError
                ("Unknown encoding: " + encoder.charset().name());
        }
        nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
        nonStandardCharsetBuffer.write(0x02); // divider
        nonStandardEncodingLen = encoding.length + 1;
    }

    private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
        if (numNonStandardChars > 0) {
            byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() *
                                       numNonStandardChars];
            ByteBuffer bb = ByteBuffer.wrap(flushBuf);
            flushEncoder(encoder, bb);
            bb.flip();
            nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
            numNonStandardChars = 0;
        }

        int numBytes = nonStandardCharsetBuffer.size();
        int nonStandardBytesOff = 6 + nonStandardEncodingLen;

        if (out.remaining() < (numBytes - nonStandardBytesOff) +
            nonStandardBytesOff * (((numBytes - nonStandardBytesOff) /
                                    ((1 << 14) - 1)) + 1))
        {
            return CoderResult.OVERFLOW;
        }

        byte[] nonStandardBytes =
            nonStandardCharsetBuffer.toByteArray();

        // The non-standard charset header only supports 2^14-1 bytes of data.
        // If we have more than that, we have to repeat the header.
        do {
            out.put((byte)0x1B);
            out.put((byte)0x25);
            out.put((byte)0x2F);
            out.put(nonStandardBytes[3]);

            int toWrite = Math.min(numBytes - nonStandardBytesOff,
                                   (1 << 14) - 1 - nonStandardEncodingLen);

            out.put((byte)
                (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M
            out.put((byte)
                (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L
            out.put(nonStandardBytes, 6, nonStandardEncodingLen);
            out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
            nonStandardBytesOff += toWrite;
        } while (nonStandardBytesOff < numBytes);

        nonStandardCharsetBuffer = null;
        byteBuf = null;
        nonStandardEncodingLen = 0;
        return CoderResult.UNDERFLOW;
    }

    /**
     * Resets the encoder.
     * Call this method to reset the encoder to its initial state
     */
    protected void implReset() {
        numNonStandardChars = nonStandardEncodingLen = 0;
        nonStandardCharsetBuffer = null;
        byteBuf = null;
        try {
            encoder = Charset.forName("ISO8859_1").newEncoder();
        } catch (IllegalArgumentException cannotHappen) {
        }
        initEncoder(encoder);
    }

    /**
     * Return whether a character is mappable or not
     * @return true if a character is mappable
     */
    public boolean canEncode(char ch) {
        return getEncoder(ch) != null;
    }

    protected void implOnMalformedInput(CodingErrorAction newAction) {
        encoder.onUnmappableCharacter(newAction);
    }

    protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
        encoder.onUnmappableCharacter(newAction);
    }

    protected void implReplaceWith(byte[] newReplacement) {
        if (encoder != null)
            encoder.replaceWith(newReplacement);
    }

    /**
     * Try to figure out which CharsetEncoder to use for conversion
     * of the specified Unicode character. The target character encoding
     * of the returned encoder is approved to be used with Compound Text.
     *
     * @param ch Unicode character
     * @return CharsetEncoder to convert the given character
     */
    private CharsetEncoder getEncoder(char ch) {
        // 1. Try the current encoder.
        if (encoder.canEncode(ch)) {
            return encoder;
        }

        // 2. Try the default encoder.
        if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
            CharsetEncoder retval = null;
            try {
                retval = defaultEncoder.charset().newEncoder();
            } catch (UnsupportedOperationException cannotHappen) {
            }
            initEncoder(retval);
            return retval;
        }

        // 3. Try ISO8859-1.
        if (latin1Encoder.canEncode(ch)) {
            CharsetEncoder retval = null;
            try {
                retval = latin1Encoder.charset().newEncoder();
            } catch (UnsupportedOperationException cannotHappen) {}
            initEncoder(retval);
            return retval;
        }

        // 4. Brute force search of all supported encodings.
        for (String encoding : CompoundTextSupport.getEncodings())
        {
            CharsetEncoder enc = encodingToEncoderMap.get(encoding);
            if (enc == null) {
                enc = CompoundTextSupport.getEncoder(encoding);
                if (enc == null) {
                    throw new InternalError("Unsupported encoding: " +
                                            encoding);
                }
                encodingToEncoderMap.put(encoding, enc);
            }
            if (enc.canEncode(ch)) {
                CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding);
                initEncoder(retval);
                return retval;
            }
        }

        return null;
    }

    private void initEncoder(CharsetEncoder enc) {
        try {
            enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
                .replaceWith(replacement());
        } catch (IllegalArgumentException x) {}
    }

    private CharBuffer fcb= CharBuffer.allocate(0);
    private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
        enc.encode(fcb, bb, true);
        return enc.flush(bb);
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java COMPOUND_TEXT_Encoder.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.