|
Java example source code file (CharTrie.java)
The CharTrie.java Java example source code/* * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* ******************************************************************************* * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * * * * The original version of this source code and documentation is copyrighted * * and owned by IBM, These materials are provided under terms of a License * * Agreement between IBM and Sun. This technology is protected by multiple * * US and International patents. This notice and attribution to IBM may not * * to removed. * ******************************************************************************* */ package sun.text.normalizer; import java.io.InputStream; import java.io.DataInputStream; import java.io.IOException; /** * Trie implementation which stores data in char, 16 bits. * @author synwee * @see com.ibm.icu.impl.Trie * @since release 2.1, Jan 01 2002 */ // note that i need to handle the block calculations later, since chartrie // in icu4c uses the same index array. public class CharTrie extends Trie { // public constructors --------------------------------------------- /** * <p>Creates a new Trie with the settings for the trie data. * <p>Unserialize the 32-bit-aligned input stream and use the data for the * trie.</p> * @param inputStream file input stream to a ICU data file, containing * the trie * @param dataManipulate object which provides methods to parse the char * data * @throws IOException thrown when data reading fails * @draft 2.1 */ public CharTrie(InputStream inputStream, DataManipulate dataManipulate) throws IOException { super(inputStream, dataManipulate); if (!isCharTrie()) { throw new IllegalArgumentException( "Data given does not belong to a char trie."); } m_friendAgent_ = new FriendAgent(); } /** * Make a dummy CharTrie. * A dummy trie is an empty runtime trie, used when a real data trie cannot * be loaded. * * The trie always returns the initialValue, * or the leadUnitValue for lead surrogate code points. * The Latin-1 part is always set up to be linear. * * @param initialValue the initial value that is set for all code points * @param leadUnitValue the value for lead surrogate code _units_ that do not * have associated supplementary data * @param dataManipulate object which provides methods to parse the char data */ public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); int dataLength, latin1Length, i, limit; char block; /* calculate the actual size of the dummy trie data */ /* max(Latin-1, block 0) */ dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; if(leadUnitValue!=initialValue) { dataLength+=DATA_BLOCK_LENGTH; } m_data_=new char[dataLength]; m_dataLength_=dataLength; m_initialValue_=(char)initialValue; /* fill the index and data arrays */ /* indexes are preset to 0 (block 0) */ /* Latin-1 data */ for(i=0; i<latin1Length; ++i) { m_data_[i]=(char)initialValue; } if(leadUnitValue!=initialValue) { /* indexes for lead surrogate code units to the block after Latin-1 */ block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_); i=0xd800>>INDEX_STAGE_1_SHIFT_; limit=0xdc00>>INDEX_STAGE_1_SHIFT_; for(; i<limit; ++i) { m_index_[i]=block; } /* data for lead surrogate code units */ limit=latin1Length+DATA_BLOCK_LENGTH; for(i=latin1Length; i<limit; ++i) { m_data_[i]=(char)leadUnitValue; } } m_friendAgent_ = new FriendAgent(); } /** * Java friend implementation */ public class FriendAgent { /** * Gives out the index array of the trie * @return index array of trie */ public char[] getPrivateIndex() { return m_index_; } /** * Gives out the data array of the trie * @return data array of trie */ public char[] getPrivateData() { return m_data_; } /** * Gives out the data offset in the trie * @return data offset in the trie */ public int getPrivateInitialValue() { return m_initialValue_; } } // public methods -------------------------------------------------- /** * Java friend implementation * To store the index and data array into the argument. * @param friend java friend UCharacterProperty object to store the array */ public void putIndexData(UCharacterProperty friend) { friend.setIndexData(m_friendAgent_); } /** * Gets the value associated with the codepoint. * If no value is associated with the codepoint, a default value will be * returned. * @param ch codepoint * @return offset to data * @draft 2.1 */ public final char getCodePointValue(int ch) { int offset; // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return (offset >= 0) ? m_data_[offset] : m_initialValue_; } /** * Gets the value to the data which this lead surrogate character points * to. * Returned data may contain folding offset information for the next * trailing surrogate character. * This method does not guarantee correct results for trail surrogates. * @param ch lead surrogate character * @return data value * @draft 2.1 */ public final char getLeadValue(char ch) { return m_data_[getLeadOffset(ch)]; } /** * Get the value associated with a pair of surrogates. * @param lead a lead surrogate * @param trail a trail surrogate * @draft 2.1 */ public final char getSurrogateValue(char lead, char trail) { int offset = getSurrogateOffset(lead, trail); if (offset > 0) { return m_data_[offset]; } return m_initialValue_; } /** * <p>Get a value from a folding offset (from the value of a lead surrogate) * and a trail surrogate.</p> * <p>If the * @param leadvalue value associated with the lead surrogate which contains * the folding offset * @param trail surrogate * @return trie data value associated with the trail character * @draft 2.1 */ public final char getTrailValue(int leadvalue, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } int offset = m_dataManipulate_.getFoldingOffset(leadvalue); if (offset > 0) { return m_data_[getRawOffset(offset, (char)(trail & SURROGATE_MASK_))]; } return m_initialValue_; } // protected methods ----------------------------------------------- /** * <p>Parses the input stream and stores its trie content into a index and * data array</p> * @param inputStream data input stream containing trie data * @exception IOException thrown when data reading fails */ protected final void unserialize(InputStream inputStream) throws IOException { DataInputStream input = new DataInputStream(inputStream); int indexDataLength = m_dataOffset_ + m_dataLength_; m_index_ = new char[indexDataLength]; for (int i = 0; i < indexDataLength; i ++) { m_index_[i] = input.readChar(); } m_data_ = m_index_; m_initialValue_ = m_data_[m_dataOffset_]; } /** * Gets the offset to the data which the surrogate pair points to. * @param lead lead surrogate * @param trail trailing surrogate * @return offset to data * @draft 2.1 */ protected final int getSurrogateOffset(char lead, char trail) { if (m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } // get fold position for the next trail surrogate int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); // get the real data from the folded lead/trail units if (offset > 0) { return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); } // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return -1; } /** * Gets the value at the argument index. * For use internally in TrieIterator. * @param index value at index will be retrieved * @return 32 bit value * @see com.ibm.icu.impl.TrieIterator * @draft 2.1 */ protected final int getValue(int index) { return m_data_[index]; } /** * Gets the default initial value * @return 32 bit value * @draft 2.1 */ protected final int getInitialValue() { return m_initialValue_; } // private data members -------------------------------------------- /** * Default value */ private char m_initialValue_; /** * Array of char data */ private char m_data_[]; /** * Agent for friends */ private FriendAgent m_friendAgent_; } Other Java examples (source code examples)Here is a short list of links related to this Java CharTrie.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.