alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (MultivariateSummaryStatistics.java)

This example Java source code file (MultivariateSummaryStatistics.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

dimensionmismatchexception, geometricmean, mathillegalstateexception, max, multivariatesummarystatistics, override, realmatrix, statisticalmultivariatesummary, storelessunivariatestatistic, string, stringbuilder, sumoflogs, sumofsquares, util, vectorialcovariance

The MultivariateSummaryStatistics.java Java example source code

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.math3.stat.descriptive;

import java.io.Serializable;
import java.util.Arrays;

import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.MathIllegalStateException;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.stat.descriptive.moment.GeometricMean;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.moment.VectorialCovariance;
import org.apache.commons.math3.stat.descriptive.rank.Max;
import org.apache.commons.math3.stat.descriptive.rank.Min;
import org.apache.commons.math3.stat.descriptive.summary.Sum;
import org.apache.commons.math3.stat.descriptive.summary.SumOfLogs;
import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares;
import org.apache.commons.math3.util.MathUtils;
import org.apache.commons.math3.util.MathArrays;
import org.apache.commons.math3.util.Precision;
import org.apache.commons.math3.util.FastMath;

/**
 * <p>Computes summary statistics for a stream of n-tuples added using the
 * {@link #addValue(double[]) addValue} method. The data values are not stored
 * in memory, so this class can be used to compute statistics for very large
 * n-tuple streams.</p>
 *
 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
 * summary state and compute statistics are configurable via setters.
 * For example, the default implementation for the mean can be overridden by
 * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
 * parameters to these methods must implement the
 * {@link StorelessUnivariateStatistic} interface and configuration must be
 * completed before <code>addValue is called. No configuration is
 * necessary to use the default, commons-math provided implementations.</p>
 *
 * <p>To compute statistics for a stream of n-tuples, construct a
 * MultivariateStatistics instance with dimension n and then use
 * {@link #addValue(double[])} to add n-tuples. The <code>getXxx
 * methods where Xxx is a statistic return an array of <code>double
 * values, where for <code>i = 0,...,n-1 the ith array element is the
 * value of the given statistic for data range consisting of the i<sup>th element of
 * each of the input n-tuples.  For example, if <code>addValue is called
 * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
 * <code>getSum will return a three-element array with values
 * {0+3+6, 1+4+7, 2+5+8}</p>
 *
 * <p>Note: This class is not thread-safe. Use
 * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
 * threads is required.</p>
 *
 * @since 1.2
 */
public class MultivariateSummaryStatistics
    implements StatisticalMultivariateSummary, Serializable {

    /** Serialization UID */
    private static final long serialVersionUID = 2271900808994826718L;

    /** Dimension of the data. */
    private int k;

    /** Count of values that have been added */
    private long n = 0;

    /** Sum statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] sumImpl;

    /** Sum of squares statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] sumSqImpl;

    /** Minimum statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] minImpl;

    /** Maximum statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] maxImpl;

    /** Sum of log statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] sumLogImpl;

    /** Geometric mean statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] geoMeanImpl;

    /** Mean statistic implementation - can be reset by setter. */
    private StorelessUnivariateStatistic[] meanImpl;

    /** Covariance statistic implementation - cannot be reset. */
    private VectorialCovariance covarianceImpl;

    /**
     * Construct a MultivariateSummaryStatistics instance
     * @param k dimension of the data
     * @param isCovarianceBiasCorrected if true, the unbiased sample
     * covariance is computed, otherwise the biased population covariance
     * is computed
     */
    public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
        this.k = k;

        sumImpl     = new StorelessUnivariateStatistic[k];
        sumSqImpl   = new StorelessUnivariateStatistic[k];
        minImpl     = new StorelessUnivariateStatistic[k];
        maxImpl     = new StorelessUnivariateStatistic[k];
        sumLogImpl  = new StorelessUnivariateStatistic[k];
        geoMeanImpl = new StorelessUnivariateStatistic[k];
        meanImpl    = new StorelessUnivariateStatistic[k];

        for (int i = 0; i < k; ++i) {
            sumImpl[i]     = new Sum();
            sumSqImpl[i]   = new SumOfSquares();
            minImpl[i]     = new Min();
            maxImpl[i]     = new Max();
            sumLogImpl[i]  = new SumOfLogs();
            geoMeanImpl[i] = new GeometricMean();
            meanImpl[i]    = new Mean();
        }

        covarianceImpl =
            new VectorialCovariance(k, isCovarianceBiasCorrected);

    }

    /**
     * Add an n-tuple to the data
     *
     * @param value  the n-tuple to add
     * @throws DimensionMismatchException if the length of the array
     * does not match the one used at construction
     */
    public void addValue(double[] value) throws DimensionMismatchException {
        checkDimension(value.length);
        for (int i = 0; i < k; ++i) {
            double v = value[i];
            sumImpl[i].increment(v);
            sumSqImpl[i].increment(v);
            minImpl[i].increment(v);
            maxImpl[i].increment(v);
            sumLogImpl[i].increment(v);
            geoMeanImpl[i].increment(v);
            meanImpl[i].increment(v);
        }
        covarianceImpl.increment(value);
        n++;
    }

    /**
     * Returns the dimension of the data
     * @return The dimension of the data
     */
    public int getDimension() {
        return k;
    }

    /**
     * Returns the number of available values
     * @return The number of available values
     */
    public long getN() {
        return n;
    }

    /**
     * Returns an array of the results of a statistic.
     * @param stats univariate statistic array
     * @return results array
     */
    private double[] getResults(StorelessUnivariateStatistic[] stats) {
        double[] results = new double[stats.length];
        for (int i = 0; i < results.length; ++i) {
            results[i] = stats[i].getResult();
        }
        return results;
    }

    /**
     * Returns an array whose i<sup>th entry is the sum of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component sums
     */
    public double[] getSum() {
        return getResults(sumImpl);
    }

    /**
     * Returns an array whose i<sup>th entry is the sum of squares of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component sums of squares
     */
    public double[] getSumSq() {
        return getResults(sumSqImpl);
    }

    /**
     * Returns an array whose i<sup>th entry is the sum of logs of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component log sums
     */
    public double[] getSumLog() {
        return getResults(sumLogImpl);
    }

    /**
     * Returns an array whose i<sup>th entry is the mean of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component means
     */
    public double[] getMean() {
        return getResults(meanImpl);
    }

    /**
     * Returns an array whose i<sup>th entry is the standard deviation of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component standard deviations
     */
    public double[] getStandardDeviation() {
        double[] stdDev = new double[k];
        if (getN() < 1) {
            Arrays.fill(stdDev, Double.NaN);
        } else if (getN() < 2) {
            Arrays.fill(stdDev, 0.0);
        } else {
            RealMatrix matrix = covarianceImpl.getResult();
            for (int i = 0; i < k; ++i) {
                stdDev[i] = FastMath.sqrt(matrix.getEntry(i, i));
            }
        }
        return stdDev;
    }

    /**
     * Returns the covariance matrix of the values that have been added.
     *
     * @return the covariance matrix
     */
    public RealMatrix getCovariance() {
        return covarianceImpl.getResult();
    }

    /**
     * Returns an array whose i<sup>th entry is the maximum of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component maxima
     */
    public double[] getMax() {
        return getResults(maxImpl);
    }

    /**
     * Returns an array whose i<sup>th entry is the minimum of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component minima
     */
    public double[] getMin() {
        return getResults(minImpl);
    }

    /**
     * Returns an array whose i<sup>th entry is the geometric mean of the
     * i<sup>th entries of the arrays that have been added using
     * {@link #addValue(double[])}
     *
     * @return the array of component geometric means
     */
    public double[] getGeometricMean() {
        return getResults(geoMeanImpl);
    }

    /**
     * Generates a text report displaying
     * summary statistics from values that
     * have been added.
     * @return String with line feeds displaying statistics
     */
    @Override
    public String toString() {
        final String separator = ", ";
        final String suffix = System.getProperty("line.separator");
        StringBuilder outBuffer = new StringBuilder();
        outBuffer.append("MultivariateSummaryStatistics:" + suffix);
        outBuffer.append("n: " + getN() + suffix);
        append(outBuffer, getMin(), "min: ", separator, suffix);
        append(outBuffer, getMax(), "max: ", separator, suffix);
        append(outBuffer, getMean(), "mean: ", separator, suffix);
        append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
        append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
        append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
        append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
        outBuffer.append("covariance: " + getCovariance().toString() + suffix);
        return outBuffer.toString();
    }

    /**
     * Append a text representation of an array to a buffer.
     * @param buffer buffer to fill
     * @param data data array
     * @param prefix text prefix
     * @param separator elements separator
     * @param suffix text suffix
     */
    private void append(StringBuilder buffer, double[] data,
                        String prefix, String separator, String suffix) {
        buffer.append(prefix);
        for (int i = 0; i < data.length; ++i) {
            if (i > 0) {
                buffer.append(separator);
            }
            buffer.append(data[i]);
        }
        buffer.append(suffix);
    }

    /**
     * Resets all statistics and storage
     */
    public void clear() {
        this.n = 0;
        for (int i = 0; i < k; ++i) {
            minImpl[i].clear();
            maxImpl[i].clear();
            sumImpl[i].clear();
            sumLogImpl[i].clear();
            sumSqImpl[i].clear();
            geoMeanImpl[i].clear();
            meanImpl[i].clear();
        }
        covarianceImpl.clear();
    }

    /**
     * Returns true iff <code>object is a MultivariateSummaryStatistics
     * instance and all statistics have the same values as this.
     * @param object the object to test equality against.
     * @return true if object equals this
     */
    @Override
    public boolean equals(Object object) {
        if (object == this ) {
            return true;
        }
        if (object instanceof MultivariateSummaryStatistics == false) {
            return false;
        }
        MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
        return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
               MathArrays.equalsIncludingNaN(stat.getMax(),           getMax())           &&
               MathArrays.equalsIncludingNaN(stat.getMean(),          getMean())          &&
               MathArrays.equalsIncludingNaN(stat.getMin(),           getMin())           &&
               Precision.equalsIncludingNaN(stat.getN(),             getN())             &&
               MathArrays.equalsIncludingNaN(stat.getSum(),           getSum())           &&
               MathArrays.equalsIncludingNaN(stat.getSumSq(),         getSumSq())         &&
               MathArrays.equalsIncludingNaN(stat.getSumLog(),        getSumLog())        &&
               stat.getCovariance().equals( getCovariance());
    }

    /**
     * Returns hash code based on values of statistics
     *
     * @return hash code
     */
    @Override
    public int hashCode() {
        int result = 31 + MathUtils.hash(getGeometricMean());
        result = result * 31 + MathUtils.hash(getGeometricMean());
        result = result * 31 + MathUtils.hash(getMax());
        result = result * 31 + MathUtils.hash(getMean());
        result = result * 31 + MathUtils.hash(getMin());
        result = result * 31 + MathUtils.hash(getN());
        result = result * 31 + MathUtils.hash(getSum());
        result = result * 31 + MathUtils.hash(getSumSq());
        result = result * 31 + MathUtils.hash(getSumLog());
        result = result * 31 + getCovariance().hashCode();
        return result;
    }

    // Getters and setters for statistics implementations
    /**
     * Sets statistics implementations.
     * @param newImpl new implementations for statistics
     * @param oldImpl old implementations for statistics
     * @throws DimensionMismatchException if the array dimension
     * does not match the one used at construction
     * @throws MathIllegalStateException if data has already been added
     * (i.e. if n > 0)
     */
    private void setImpl(StorelessUnivariateStatistic[] newImpl,
                         StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException,
                         DimensionMismatchException {
        checkEmpty();
        checkDimension(newImpl.length);
        System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
    }

    /**
     * Returns the currently configured Sum implementation
     *
     * @return the StorelessUnivariateStatistic implementing the sum
     */
    public StorelessUnivariateStatistic[] getSumImpl() {
        return sumImpl.clone();
    }

    /**
     * <p>Sets the implementation for the Sum.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param sumImpl the StorelessUnivariateStatistic instance to use * for computing the Sum * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setSumImpl(StorelessUnivariateStatistic[] sumImpl) throws MathIllegalStateException, DimensionMismatchException { setImpl(sumImpl, this.sumImpl); } /** * Returns the currently configured sum of squares implementation * * @return the StorelessUnivariateStatistic implementing the sum of squares */ public StorelessUnivariateStatistic[] getSumsqImpl() { return sumSqImpl.clone(); } /** * <p>Sets the implementation for the sum of squares.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param sumsqImpl the StorelessUnivariateStatistic instance to use * for computing the sum of squares * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl) throws MathIllegalStateException, DimensionMismatchException { setImpl(sumsqImpl, this.sumSqImpl); } /** * Returns the currently configured minimum implementation * * @return the StorelessUnivariateStatistic implementing the minimum */ public StorelessUnivariateStatistic[] getMinImpl() { return minImpl.clone(); } /** * <p>Sets the implementation for the minimum.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param minImpl the StorelessUnivariateStatistic instance to use * for computing the minimum * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setMinImpl(StorelessUnivariateStatistic[] minImpl) throws MathIllegalStateException, DimensionMismatchException { setImpl(minImpl, this.minImpl); } /** * Returns the currently configured maximum implementation * * @return the StorelessUnivariateStatistic implementing the maximum */ public StorelessUnivariateStatistic[] getMaxImpl() { return maxImpl.clone(); } /** * <p>Sets the implementation for the maximum.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param maxImpl the StorelessUnivariateStatistic instance to use * for computing the maximum * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl) throws MathIllegalStateException, DimensionMismatchException{ setImpl(maxImpl, this.maxImpl); } /** * Returns the currently configured sum of logs implementation * * @return the StorelessUnivariateStatistic implementing the log sum */ public StorelessUnivariateStatistic[] getSumLogImpl() { return sumLogImpl.clone(); } /** * <p>Sets the implementation for the sum of logs.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param sumLogImpl the StorelessUnivariateStatistic instance to use * for computing the log sum * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl) throws MathIllegalStateException, DimensionMismatchException{ setImpl(sumLogImpl, this.sumLogImpl); } /** * Returns the currently configured geometric mean implementation * * @return the StorelessUnivariateStatistic implementing the geometric mean */ public StorelessUnivariateStatistic[] getGeoMeanImpl() { return geoMeanImpl.clone(); } /** * <p>Sets the implementation for the geometric mean.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param geoMeanImpl the StorelessUnivariateStatistic instance to use * for computing the geometric mean * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl) throws MathIllegalStateException, DimensionMismatchException { setImpl(geoMeanImpl, this.geoMeanImpl); } /** * Returns the currently configured mean implementation * * @return the StorelessUnivariateStatistic implementing the mean */ public StorelessUnivariateStatistic[] getMeanImpl() { return meanImpl.clone(); } /** * <p>Sets the implementation for the mean.

* <p>This method must be activated before any data has been added - i.e., * before {@link #addValue(double[]) addValue} has been used to add data; * otherwise an IllegalStateException will be thrown.</p> * * @param meanImpl the StorelessUnivariateStatistic instance to use * for computing the mean * @throws DimensionMismatchException if the array dimension * does not match the one used at construction * @throws MathIllegalStateException if data has already been added * (i.e if n > 0) */ public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl) throws MathIllegalStateException, DimensionMismatchException{ setImpl(meanImpl, this.meanImpl); } /** * Throws MathIllegalStateException if the statistic is not empty. * @throws MathIllegalStateException if n > 0. */ private void checkEmpty() throws MathIllegalStateException { if (n > 0) { throw new MathIllegalStateException( LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n); } } /** * Throws DimensionMismatchException if dimension != k. * @param dimension dimension to check * @throws DimensionMismatchException if dimension != k */ private void checkDimension(int dimension) throws DimensionMismatchException { if (dimension != k) { throw new DimensionMismatchException(dimension, k); } } }

Other Java examples (source code examples)

Here is a short list of links related to this Java MultivariateSummaryStatistics.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.