alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (PopulationCoding.java)

This example Java source code file (PopulationCoding.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

adaptivecoding, coding, codingmethod, fcode, fdef, ioexception, lcoded, lvaluescoded, populationcoding, tdef, tdefl, ucode, udef, util

The PopulationCoding.java Java example source code

/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package com.sun.java.util.jar.pack;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import static com.sun.java.util.jar.pack.Constants.*;

/**
 * Population-based coding.
 * See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
 * @author John Rose
 */
// This tactic alone reduces the final zipped rt.jar by about a percent.
class PopulationCoding implements CodingMethod {
    Histogram vHist;   // histogram of all values
    int[]     fValues; // list of favored values
    int       fVlen;   // inclusive max index
    long[]    symtab;  // int map of favored value -> token [1..#fValues]

    CodingMethod favoredCoding;
    CodingMethod tokenCoding;
    CodingMethod unfavoredCoding;

    int L = -1;  //preferred L value for tokenCoding

    public void setFavoredValues(int[] fValues, int fVlen) {
        // Note:  {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
        // This is because zero is an exceptional favored value index.
        assert(fValues[0] == 0);  // must be empty
        assert(this.fValues == null);  // do not do this twice
        this.fValues = fValues;
        this.fVlen   = fVlen;
        if (L >= 0) {
            setL(L);  // reassert
        }
    }
    public void setFavoredValues(int[] fValues) {
        int lfVlen = fValues.length-1;
        setFavoredValues(fValues, lfVlen);
    }
    public void setHistogram(Histogram vHist) {
        this.vHist = vHist;
    }
    public void setL(int L) {
        this.L = L;
        if (L >= 0 && fValues != null && tokenCoding == null) {
            tokenCoding = fitTokenCoding(fVlen, L);
            assert(tokenCoding != null);
        }
    }

    public static Coding fitTokenCoding(int fVlen, int L) {
        // Find the smallest B s.t. (B,H,0) covers fVlen.
        if (fVlen < 256)
            // H/L do not matter when B==1
            return BandStructure.BYTE1;
        Coding longest = BandStructure.UNSIGNED5.setL(L);
        if (!longest.canRepresentUnsigned(fVlen))
            return null;  // failure; L is too sharp and fVlen too large
        Coding tc = longest;
        for (Coding shorter = longest; ; ) {
            shorter = shorter.setB(shorter.B()-1);
            if (shorter.umax() < fVlen)
                break;
            tc = shorter;  // shorten it by reducing B
        }
        return tc;
    }

    public void setFavoredCoding(CodingMethod favoredCoding) {
        this.favoredCoding = favoredCoding;
    }
    public void setTokenCoding(CodingMethod tokenCoding) {
        this.tokenCoding = tokenCoding;
        this.L = -1;
        if (tokenCoding instanceof Coding && fValues != null) {
            Coding tc = (Coding) tokenCoding;
            if (tc == fitTokenCoding(fVlen, tc.L()))
                this.L = tc.L();
            // Otherwise, it's a non-default coding.
        }
    }
    public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
        this.unfavoredCoding = unfavoredCoding;
    }

    public int favoredValueMaxLength() {
        if (L == 0)
            return Integer.MAX_VALUE;
        else
            return BandStructure.UNSIGNED5.setL(L).umax();
    }

    public void resortFavoredValues() {
        Coding tc = (Coding) tokenCoding;
        // Make a local copy before reordering.
        fValues = BandStructure.realloc(fValues, 1+fVlen);
        // Resort favoredValues within each byte-size cadre.
        int fillp = 1;  // skip initial zero
        for (int n = 1; n <= tc.B(); n++) {
            int nmax = tc.byteMax(n);
            if (nmax > fVlen)
                nmax = fVlen;
            if (nmax < tc.byteMin(n))
                break;
            int low = fillp;
            int high = nmax+1;
            if (high == low)  continue;
            assert(high > low)
                : high+"!>"+low;
            assert(tc.getLength(low) == n)
                : n+" != len("+(low)+") == "+
                  tc.getLength(low);
            assert(tc.getLength(high-1) == n)
                : n+" != len("+(high-1)+") == "+
                  tc.getLength(high-1);
            int midTarget = low + (high-low)/2;
            int mid = low;
            // Divide the values into cadres, and sort within each.
            int prevCount = -1;
            int prevLimit = low;
            for (int i = low; i < high; i++) {
                int val = fValues[i];
                int count = vHist.getFrequency(val);
                if (prevCount != count) {
                    if (n == 1) {
                        // For the single-byte encoding, keep strict order
                        // among frequency groups.
                        Arrays.sort(fValues, prevLimit, i);
                    } else if (Math.abs(mid - midTarget) >
                               Math.abs(i   - midTarget)) {
                        // Find a single inflection point
                        // close to the middle of the byte-size cadre.
                        mid = i;
                    }
                    prevCount = count;
                    prevLimit = i;
                }
            }
            if (n == 1) {
                Arrays.sort(fValues, prevLimit, high);
            } else {
                // Sort up to the midpoint, if any.
                Arrays.sort(fValues, low, mid);
                Arrays.sort(fValues, mid, high);
            }
            assert(tc.getLength(low) == tc.getLength(mid));
            assert(tc.getLength(low) == tc.getLength(high-1));
            fillp = nmax+1;
        }
        assert(fillp == fValues.length);

        // Reset symtab.
        symtab = null;
    }

    public int getToken(int value) {
        if (symtab == null)
            symtab = makeSymtab();
        int pos = Arrays.binarySearch(symtab, (long)value << 32);
        if (pos < 0)  pos = -pos-1;
        if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
            return (int)symtab[pos];
        else
            return 0;
    }

    public int[][] encodeValues(int[] values, int start, int end) {
        // Compute token sequence.
        int[] tokens = new int[end-start];
        int nuv = 0;
        for (int i = 0; i < tokens.length; i++) {
            int val = values[start+i];
            int tok = getToken(val);
            if (tok != 0)
                tokens[i] = tok;
            else
                nuv += 1;
        }
        // Compute unfavored value sequence.
        int[] unfavoredValues = new int[nuv];
        nuv = 0;  // reset
        for (int i = 0; i < tokens.length; i++) {
            if (tokens[i] != 0)  continue;  // already covered
            int val = values[start+i];
            unfavoredValues[nuv++] = val;
        }
        assert(nuv == unfavoredValues.length);
        return new int[][]{ tokens, unfavoredValues };
    }

    private long[] makeSymtab() {
        long[] lsymtab = new long[fVlen];
        for (int token = 1; token <= fVlen; token++) {
            lsymtab[token-1] = ((long)fValues[token] << 32) | token;
        }
        // Index by value:
        Arrays.sort(lsymtab);
        return lsymtab;
    }

    private Coding getTailCoding(CodingMethod c) {
        while (c instanceof AdaptiveCoding)
            c = ((AdaptiveCoding)c).tailCoding;
        return (Coding) c;
    }

    // CodingMethod methods.
    public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
        int[][] vals = encodeValues(a, start, end);
        writeSequencesTo(out, vals[0], vals[1]);
    }
    void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
        favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
        getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
        tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
        if (uValues.length > 0)
            unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
    }

   int computeSentinelValue() {
        Coding fc = getTailCoding(favoredCoding);
        if (fc.isDelta()) {
            // repeat the last favored value, using delta=0
            return 0;
        } else {
            // else repeat the shorter of the min or last value
            int min = fValues[1];
            int last = min;
            // (remember that fVlen is an inclusive limit in fValues)
            for (int i = 2; i <= fVlen; i++) {
                last = fValues[i];
                min = moreCentral(min, last);
            }
            int endVal;
            if (fc.getLength(min) <= fc.getLength(last))
                return min;
            else
                return last;
        }
   }

    public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
        // Parameters are fCode, L, uCode.
        setFavoredValues(readFavoredValuesFrom(in, end-start));
        // Read the tokens.  Read them into the final array, for the moment.
        tokenCoding.readArrayFrom(in, a, start, end);
        // Decode the favored tokens.
        int headp = 0, tailp = -1;
        int uVlen = 0;
        for (int i = start; i < end; i++) {
            int tok = a[i];
            if (tok == 0) {
                // Make a linked list, and decode in a second pass.
                if (tailp < 0) {
                    headp = i;
                } else {
                    a[tailp] = i;
                }
                tailp = i;
                uVlen += 1;
            } else {
                a[i] = fValues[tok];
            }
        }
        // Walk the linked list of "zero" locations, decoding unfavored vals.
        int[] uValues = new int[uVlen];
        if (uVlen > 0)
            unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
        for (int i = 0; i < uVlen; i++) {
            int nextp = a[headp];
            a[headp] = uValues[i];
            headp = nextp;
        }
    }

    int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
        int[] lfValues = new int[1000];  // realloc as needed
        // The set uniqueValuesForDebug records all favored values.
        // As each new value is added, we assert that the value
        // was not already in the set.
        Set<Integer> uniqueValuesForDebug = null;
        assert((uniqueValuesForDebug = new HashSet<>()) != null);
        int fillp = 1;
        maxForDebug += fillp;
        int min = Integer.MIN_VALUE;  // farthest from the center
        //int min2 = Integer.MIN_VALUE;  // emulate buggy 150.7 spec.
        int last = 0;
        CodingMethod fcm = favoredCoding;
        while (fcm instanceof AdaptiveCoding) {
            AdaptiveCoding ac = (AdaptiveCoding) fcm;
            int len = ac.headLength;
            while (fillp + len > lfValues.length) {
                lfValues = BandStructure.realloc(lfValues);
            }
            int newFillp = fillp + len;
            ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp);
            while (fillp < newFillp) {
                int val = lfValues[fillp++];
                assert(uniqueValuesForDebug.add(val));
                assert(fillp <= maxForDebug);
                last = val;
                min = moreCentral(min, val);
                //min2 = moreCentral2(min2, val, min);
            }
            fcm = ac.tailCoding;
        }
        Coding fc = (Coding) fcm;
        if (fc.isDelta()) {
            for (long state = 0;;) {
                // Read a new value:
                state += fc.readFrom(in);
                int val;
                if (fc.isSubrange())
                    val = fc.reduceToUnsignedRange(state);
                else
                    val = (int)state;
                state = val;
                if (fillp > 1 && (val == last || val == min)) //|| val == min2
                    break;
                if (fillp == lfValues.length)
                    lfValues = BandStructure.realloc(lfValues);
                lfValues[fillp++] = val;
                assert(uniqueValuesForDebug.add(val));
                assert(fillp <= maxForDebug);
                last = val;
                min = moreCentral(min, val);
                //min2 = moreCentral(min2, val);
            }
        } else {
            for (;;) {
                int val = fc.readFrom(in);
                if (fillp > 1 && (val == last || val == min)) //|| val == min2
                    break;
                if (fillp == lfValues.length)
                    lfValues = BandStructure.realloc(lfValues);
                lfValues[fillp++] = val;
                assert(uniqueValuesForDebug.add(val));
                assert(fillp <= maxForDebug);
                last = val;
                min = moreCentral(min, val);
                //min2 = moreCentral2(min2, val, min);
            }
        }
        return BandStructure.realloc(lfValues, fillp);
    }

    private static int moreCentral(int x, int y) {
        int kx = (x >> 31) ^ (x << 1);
        int ky = (y >> 31) ^ (y << 1);
        // bias kx/ky to get an unsigned comparison:
        kx -= Integer.MIN_VALUE;
        ky -= Integer.MIN_VALUE;
        int xy = (kx < ky? x: y);
        // assert that this ALU-ish version is the same:
        assert(xy == moreCentralSlow(x, y));
        return xy;
    }
//  private static int moreCentral2(int x, int y, int min) {
//      // Strict implementation of buggy 150.7 specification.
//      // The bug is that the spec. says absolute-value ties are broken
//      // in favor of positive numbers, but the suggested implementation
//      // (also mentioned in the spec.) breaks ties in favor of negatives.
//      if (x + y == 0)  return (x > y? x : y);
//      return min;
//  }
    private static int moreCentralSlow(int x, int y) {
        int ax = x;
        if (ax < 0)  ax = -ax;
        if (ax < 0)  return y;  //x is MIN_VALUE
        int ay = y;
        if (ay < 0)  ay = -ay;
        if (ay < 0)  return x;  //y is MIN_VALUE
        if (ax < ay)  return x;
        if (ax > ay)  return y;
        // At this point the absolute values agree, and the negative wins.
        return x < y ? x : y;
    }

    static final int[] LValuesCoded
        = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };

    public byte[] getMetaCoding(Coding dflt) {
        int K = fVlen;
        int LCoded = 0;
        if (tokenCoding instanceof Coding) {
            Coding tc = (Coding) tokenCoding;
            if (tc.B() == 1) {
                LCoded = 1;
            } else if (L >= 0) {
                assert(L == tc.L());
                for (int i = 1; i < LValuesCoded.length; i++) {
                    if (LValuesCoded[i] == L) { LCoded = i; break; }
                }
            }
        }
        CodingMethod tokenDflt = null;
        if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
            // A simple L value is enough to recover the tokenCoding.
            tokenDflt = tokenCoding;
        }
        int FDef = (favoredCoding == dflt)?1:0;
        int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
        int TDef = (tokenCoding == tokenDflt)?1:0;
        int TDefL = (TDef == 1) ? LCoded : 0;
        assert(TDef == ((TDefL>0)?1:0));
        ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
        bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
        try {
            if (FDef == 0)  bytes.write(favoredCoding.getMetaCoding(dflt));
            if (TDef == 0)  bytes.write(tokenCoding.getMetaCoding(dflt));
            if (UDef == 0)  bytes.write(unfavoredCoding.getMetaCoding(dflt));
        } catch (IOException ee) {
            throw new RuntimeException(ee);
        }
        return bytes.toByteArray();
    }
    public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
        int op = bytes[pos++] & 0xFF;
        if (op < _meta_pop || op >= _meta_limit)  return pos-1; // backup
        op -= _meta_pop;
        int FDef = op % 2;
        int UDef = (op / 2) % 2;
        int TDefL = (op / 4);
        int TDef = (TDefL > 0)?1:0;
        int L = LValuesCoded[TDefL];
        CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
        if (FDef == 0)
            pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
        if (TDef == 0)
            pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
        if (UDef == 0)
            pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
        PopulationCoding pop = new PopulationCoding();
        pop.L = L;  // might be -1
        pop.favoredCoding   = FCode[0];
        pop.tokenCoding     = TCode[0];  // might be null!
        pop.unfavoredCoding = UCode[0];
        res[0] = pop;
        return pos;
    }

    private String keyString(CodingMethod m) {
        if (m instanceof Coding)
            return ((Coding)m).keyString();
        if (m == null)
            return "none";
        return m.toString();
    }
    public String toString() {
        PropMap p200 = Utils.currentPropMap();
        boolean verbose
            = (p200 != null &&
               p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
        StringBuilder res = new StringBuilder(100);
        res.append("pop(").append("fVlen=").append(fVlen);
        if (verbose && fValues != null) {
            res.append(" fV=[");
            for (int i = 1; i <= fVlen; i++) {
                res.append(i==1?"":",").append(fValues[i]);
            }
            res.append(";").append(computeSentinelValue());
            res.append("]");
        }
        res.append(" fc=").append(keyString(favoredCoding));
        res.append(" tc=").append(keyString(tokenCoding));
        res.append(" uc=").append(keyString(unfavoredCoding));
        res.append(")");
        return res.toString();
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java PopulationCoding.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.