Java - Java tags/keywords

arraylist, dataset, exception, flatmapfunction, iterable, javardd, list, minibatchfunction, override, rddminibatches, serializable, util

The RDDMiniBatches.java Java example source code

 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.

package org.deeplearning4j.spark.canova;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.nd4j.linalg.dataset.DataSet;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

 * RDD mini batch partitioning
 * @author Adam Gibson
public class RDDMiniBatches  implements Serializable {
    private int miniBatches = 10;
    private JavaRDD<DataSet> toSplitJava;

    public RDDMiniBatches(int miniBatches, JavaRDD<DataSet> toSplit) {
        this.miniBatches = miniBatches;
        this.toSplitJava = toSplit;

    public JavaRDD<DataSet> miniBatchesJava() {
        //need a new mapping function, doesn't handle mini batches properly
        return toSplitJava.mapPartitions(new MiniBatchFunction(miniBatches));

    public static class MiniBatchFunction implements FlatMapFunction<Iterator {
        private int batchSize = 10;

        public MiniBatchFunction(int batchSize) {
            this.batchSize = batchSize;

        public Iterable<DataSet> call(Iterator dataSetIterator) throws Exception {
            List<DataSet> ret = new ArrayList<>();
            List<DataSet> temp = new ArrayList<>();
            while (dataSetIterator.hasNext()) {
                if (temp.size() == batchSize) {

            //edge cases with map partitions where one will be left over.
            //this is due to race conditions.
            if(temp.size() > 1)

            return ret;



