alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (CountCumSum.java)

This example Java source code file (CountCumSum.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

accumulator, broadcast, call, countcumsum, counter, cumulative, foldbetweenpartitionfunction, foldwithinpartitionfunction, illegalaccesserror, javardd, javasparkcontext, mapperpartitionvoidfunction, maxperpartitionaccumulator, suppresswarnings

The CountCumSum.java Java example source code

package org.deeplearning4j.spark.text.functions;

import org.apache.spark.Accumulator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.deeplearning4j.berkeley.Counter;
import org.deeplearning4j.spark.text.accumulators.MaxPerPartitionAccumulator;

import java.util.concurrent.atomic.AtomicLong;

/**
 * @author jeffreytang
 */
@SuppressWarnings("unchecked")
public class CountCumSum {

    // Starting variables
    private JavaSparkContext sc;
    private JavaRDD<AtomicLong> sentenceCountRDD;

    // Variables to fill in as we go
    private JavaRDD<AtomicLong> foldWithinPartitionRDD;
    private Broadcast<Counter broadcastedMaxPerPartitionCounter;
    private JavaRDD<Long> cumSumRDD;

    // Constructor
    public CountCumSum(JavaRDD<AtomicLong> sentenceCountRDD) {
        this.sentenceCountRDD = sentenceCountRDD;
        this.sc = new JavaSparkContext(sentenceCountRDD.context());
    }

    // Getter
    public JavaRDD<Long> getCumSumRDD() {
        if (cumSumRDD != null) {
            return cumSumRDD;
        } else {
            throw new IllegalAccessError("Cumulative Sum list not defined. Call buildCumSum() first.");
        }
    }

    // For each equivalent for partitions
    public void actionForMapPartition(JavaRDD rdd) {
        // Action to fill the accumulator
        rdd.foreachPartition(new MapPerPartitionVoidFunction());
    }
    // Do cum sum within the partition
    public void cumSumWithinPartition() {

        // Accumulator to get the max of the cumulative sum in each partition
        final Accumulator<Counter maxPerPartitionAcc = sc.accumulator(new Counter(),
                                                                                new MaxPerPartitionAccumulator());
        // Partition mapping to fold within partition
        foldWithinPartitionRDD = sentenceCountRDD.mapPartitionsWithIndex(
                new FoldWithinPartitionFunction(maxPerPartitionAcc), true).cache();
        actionForMapPartition(foldWithinPartitionRDD);

        // Broadcast the counter (partition index : sum of count) to all workers
        broadcastedMaxPerPartitionCounter = sc.broadcast(maxPerPartitionAcc.value());
    }

    public void cumSumBetweenPartition() {

        cumSumRDD = foldWithinPartitionRDD.mapPartitionsWithIndex(
                new FoldBetweenPartitionFunction(broadcastedMaxPerPartitionCounter), true)
                                          .setName("cumSumRDD").cache();
        foldWithinPartitionRDD.unpersist();
    }

    public JavaRDD<Long> buildCumSum() {
        cumSumWithinPartition();
        cumSumBetweenPartition();
        return getCumSumRDD();
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java CountCumSum.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.