alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Commons Math example source code file (EmpiricalDistributionImpl.java)

This example Commons Math source code file (EmpiricalDistributionImpl.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Commons Math tags/keywords

arraydataadapter, bufferedreader, bufferedreader, dataadapter, dataadapter, dataadapterfactory, empiricaldistributionimpl, filereader, io, ioexception, ioexception, net, network, override, streamdataadapter, summarystatistics, summarystatistics, util

The Commons Math EmpiricalDistributionImpl.java source code

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math.random;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.math.MathRuntimeException;
import org.apache.commons.math.stat.descriptive.StatisticalSummary;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;

/**
 * Implements <code>EmpiricalDistribution interface.  This implementation
 * uses what amounts to the
 * <a href="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
 * Variable Kernel Method</a> with Gaussian smoothing:

* <strong>Digesting the input file * <ol>

  • Pass the file once to compute min and max.
  • * <li>Divide the range from min-max into binCount "bins." * <li>Pass the data file again, computing bin counts and univariate * statistics (mean, std dev.) for each of the bins </li> * <li>Divide the interval (0,1) into subintervals associated with the bins, * with the length of a bin's subinterval proportional to its count.</li> * <strong>Generating random values from the distribution
      * <li>Generate a uniformly distributed value in (0,1) * <li>Select the subinterval to which the value belongs. * <li>Generate a random Gaussian value with mean = mean of the associated * bin and std dev = std dev of associated bin.</li>

    *<strong>USAGE NOTES:

      *<li>The binCount is set by default to 1000. A good rule of thumb * is to set the bin count to approximately the length of the input file divided * by 10. </li> *<li>The input file must be a plain text file containing one valid numeric * entry per line.</li> * </ul>

      * * @version $Revision: 925812 $ $Date: 2010-03-21 11:49:31 -0400 (Sun, 21 Mar 2010) $ */ public class EmpiricalDistributionImpl implements Serializable, EmpiricalDistribution { /** Serializable version identifier */ private static final long serialVersionUID = 5729073523949762654L; /** List of SummaryStatistics objects characterizing the bins */ private List<SummaryStatistics> binStats = null; /** Sample statistics */ private SummaryStatistics sampleStats = null; /** Max loaded value */ private double max = Double.NEGATIVE_INFINITY; /** Min loaded value */ private double min = Double.POSITIVE_INFINITY; /** Grid size */ private double delta = 0d; /** number of bins */ private int binCount = 1000; /** is the distribution loaded? */ private boolean loaded = false; /** upper bounds of subintervals in (0,1) "belonging" to the bins */ private double[] upperBounds = null; /** RandomData instance to use in repeated calls to getNext() */ private RandomData randomData = new RandomDataImpl(); /** * Creates a new EmpiricalDistribution with the default bin count. */ public EmpiricalDistributionImpl() { binStats = new ArrayList<SummaryStatistics>(); } /** * Creates a new EmpiricalDistribution with the specified bin count. * * @param binCount number of bins */ public EmpiricalDistributionImpl(int binCount) { this.binCount = binCount; binStats = new ArrayList<SummaryStatistics>(); } /** * Computes the empirical distribution from the provided * array of numbers. * * @param in the input data array */ public void load(double[] in) { DataAdapter da = new ArrayDataAdapter(in); try { da.computeStats(); fillBinStats(in); } catch (IOException e) { throw new MathRuntimeException(e); } loaded = true; } /** * Computes the empirical distribution using data read from a URL. * @param url url of the input file * * @throws IOException if an IO error occurs */ public void load(URL url) throws IOException { BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream())); try { DataAdapter da = new StreamDataAdapter(in); da.computeStats(); if (sampleStats.getN() == 0) { throw MathRuntimeException.createEOFException("URL {0} contains no data", url); } in = new BufferedReader(new InputStreamReader(url.openStream())); fillBinStats(in); loaded = true; } finally { try { in.close(); } catch (IOException ex) { // ignore } } } /** * Computes the empirical distribution from the input file. * * @param file the input file * @throws IOException if an IO error occurs */ public void load(File file) throws IOException { BufferedReader in = new BufferedReader(new FileReader(file)); try { DataAdapter da = new StreamDataAdapter(in); da.computeStats(); in = new BufferedReader(new FileReader(file)); fillBinStats(in); loaded = true; } finally { try { in.close(); } catch (IOException ex) { // ignore } } } /** * Provides methods for computing <code>sampleStats and * <code>beanStats abstracting the source of data. */ private abstract class DataAdapter{ /** * Compute bin stats. * * @throws IOException if an error occurs computing bin stats */ public abstract void computeBinStats() throws IOException; /** * Compute sample statistics. * * @throws IOException if an error occurs computing sample stats */ public abstract void computeStats() throws IOException; } /** * Factory of <code>DataAdapter objects. For every supported source * of data (array of doubles, file, etc.) an instance of the proper object * is returned. */ private class DataAdapterFactory{ /** * Creates a DataAdapter from a data object * * @param in object providing access to the data * @return DataAdapter instance */ public DataAdapter getAdapter(Object in) { if (in instanceof BufferedReader) { BufferedReader inputStream = (BufferedReader) in; return new StreamDataAdapter(inputStream); } else if (in instanceof double[]) { double[] inputArray = (double[]) in; return new ArrayDataAdapter(inputArray); } else { throw MathRuntimeException.createIllegalArgumentException( "input data comes from unsupported datasource: {0}, " + "supported sources: {1}, {2}", in.getClass().getName(), BufferedReader.class.getName(), double[].class.getName()); } } } /** * <code>DataAdapter for data provided through some input stream */ private class StreamDataAdapter extends DataAdapter{ /** Input stream providing access to the data */ private BufferedReader inputStream; /** * Create a StreamDataAdapter from a BufferedReader * * @param in BufferedReader input stream */ public StreamDataAdapter(BufferedReader in){ super(); inputStream = in; } /** {@inheritDoc} */ @Override public void computeBinStats() throws IOException { String str = null; double val = 0.0d; while ((str = inputStream.readLine()) != null) { val = Double.parseDouble(str); SummaryStatistics stats = binStats.get(findBin(val)); stats.addValue(val); } inputStream.close(); inputStream = null; } /** {@inheritDoc} */ @Override public void computeStats() throws IOException { String str = null; double val = 0.0; sampleStats = new SummaryStatistics(); while ((str = inputStream.readLine()) != null) { val = Double.valueOf(str).doubleValue(); sampleStats.addValue(val); } inputStream.close(); inputStream = null; } } /** * <code>DataAdapter for data provided as array of doubles. */ private class ArrayDataAdapter extends DataAdapter { /** Array of input data values */ private double[] inputArray; /** * Construct an ArrayDataAdapter from a double[] array * * @param in double[] array holding the data */ public ArrayDataAdapter(double[] in){ super(); inputArray = in; } /** {@inheritDoc} */ @Override public void computeStats() throws IOException { sampleStats = new SummaryStatistics(); for (int i = 0; i < inputArray.length; i++) { sampleStats.addValue(inputArray[i]); } } /** {@inheritDoc} */ @Override public void computeBinStats() throws IOException { for (int i = 0; i < inputArray.length; i++) { SummaryStatistics stats = binStats.get(findBin(inputArray[i])); stats.addValue(inputArray[i]); } } } /** * Fills binStats array (second pass through data file). * * @param in object providing access to the data * @throws IOException if an IO error occurs */ private void fillBinStats(Object in) throws IOException { // Set up grid min = sampleStats.getMin(); max = sampleStats.getMax(); delta = (max - min)/(Double.valueOf(binCount)).doubleValue(); // Initialize binStats ArrayList if (!binStats.isEmpty()) { binStats.clear(); } for (int i = 0; i < binCount; i++) { SummaryStatistics stats = new SummaryStatistics(); binStats.add(i,stats); } // Filling data in binStats Array DataAdapterFactory aFactory = new DataAdapterFactory(); DataAdapter da = aFactory.getAdapter(in); da.computeBinStats(); // Assign upperBounds based on bin counts upperBounds = new double[binCount]; upperBounds[0] = ((double) binStats.get(0).getN()) / (double) sampleStats.getN(); for (int i = 1; i < binCount-1; i++) { upperBounds[i] = upperBounds[i-1] + ((double) binStats.get(i).getN()) / (double) sampleStats.getN(); } upperBounds[binCount-1] = 1.0d; } /** * Returns the index of the bin to which the given value belongs * * @param value the value whose bin we are trying to find * @return the index of the bin containing the value */ private int findBin(double value) { return Math.min( Math.max((int) Math.ceil((value- min) / delta) - 1, 0), binCount - 1); } /** * Generates a random value from this distribution. * * @return the random value. * @throws IllegalStateException if the distribution has not been loaded */ public double getNextValue() throws IllegalStateException { if (!loaded) { throw MathRuntimeException.createIllegalStateException("distribution not loaded"); } // Start with a uniformly distributed random number in (0,1) double x = Math.random(); // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { SummaryStatistics stats = binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return randomData.nextGaussian (stats.getMean(),stats.getStandardDeviation()); } else { return stats.getMean(); // only one obs in bin } } } } throw new MathRuntimeException("no bin selected"); } /** * Returns a {@link StatisticalSummary} describing this distribution. * <strong>Preconditions:
        * <li>the distribution must be loaded before invoking this method
      * * @return the sample statistics * @throws IllegalStateException if the distribution has not been loaded */ public StatisticalSummary getSampleStats() { return sampleStats; } /** * Returns the number of bins. * * @return the number of bins. */ public int getBinCount() { return binCount; } /** * Returns a List of {@link SummaryStatistics} instances containing * statistics describing the values in each of the bins. The list is * indexed on the bin number. * * @return List of bin statistics. */ public List<SummaryStatistics> getBinStats() { return binStats; } /** * <p>Returns a fresh copy of the array of upper bounds for the bins. * Bins are: <br/> * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],..., * (upperBounds[binCount-2], upperBounds[binCount-1] = max].</p> * * <p>Note: In versions 1.0-2.0 of commons-math, this method * incorrectly returned the array of probability generator upper * bounds now returned by {@link #getGeneratorUpperBounds()}.</p> * * @return array of bin upper bounds * @since 2.1 */ public double[] getUpperBounds() { double[] binUpperBounds = new double[binCount]; binUpperBounds[0] = min + delta; for (int i = 1; i < binCount - 1; i++) { binUpperBounds[i] = binUpperBounds[i-1] + delta; } binUpperBounds[binCount - 1] = max; return binUpperBounds; } /** * <p>Returns a fresh copy of the array of upper bounds of the subintervals * of [0,1] used in generating data from the empirical distribution. * Subintervals correspond to bins with lengths proportional to bin counts.</p> * * <p>In versions 1.0-2.0 of commons-math, this array was (incorrectly) returned * by {@link #getUpperBounds()}.</p> * * @since 2.1 * @return array of upper bounds of subintervals used in data generation */ public double[] getGeneratorUpperBounds() { int len = upperBounds.length; double[] out = new double[len]; System.arraycopy(upperBounds, 0, out, 0, len); return out; } /** * Property indicating whether or not the distribution has been loaded. * * @return true if the distribution has been loaded */ public boolean isLoaded() { return loaded; } }

    Other Commons Math examples (source code examples)

    Here is a short list of links related to this Commons Math EmpiricalDistributionImpl.java source code file:

    ... this post is sponsored by my books ...

    #1 New Release!

    FP Best Seller

     

    new blog posts

     

    Copyright 1998-2021 Alvin Alexander, alvinalexander.com
    All Rights Reserved.

    A percentage of advertising revenue from
    pages under the /java/jwarehouse URI on this website is
    paid back to open source projects.