alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (TestDecayPolicies.java)

This example Java source code file (TestDecayPolicies.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

defaultgradient, double, gradient, hashmap, indarray, map, multilayerconfiguration, multilayernetwork, neuralnetconfiguration, optimizationalgorithm, string, test, updater, util

The TestDecayPolicies.java Java example source code

package org.deeplearning4j.nn.updater;

import org.apache.commons.math3.util.FastMath;
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.api.Updater;
import org.deeplearning4j.nn.conf.LearningRatePolicy;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.factory.LayerFactories;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.params.DefaultParamInitializer;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.api.ConvexOptimizer;
import org.deeplearning4j.optimize.solvers.StochasticGradientDescent;
import org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction;
import org.junit.Before;
import org.junit.Test;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import org.nd4j.linalg.ops.transforms.Transforms;

import java.util.HashMap;
import java.util.Map;

import static org.junit.Assert.assertEquals;

/**
 * Test learning rate and momentum decay policies
 */


public class TestDecayPolicies {

    int nIn = 3;
    int nOut = 2;
    double epsilon = 1e-8;
    INDArray weightGradient = Nd4j.ones(nIn,nOut);
    INDArray biasGradient = Nd4j.ones(1,nOut);
    Gradient gradientSingle = new DefaultGradient();
    Gradient gradientMLN = new DefaultGradient();
    INDArray val, gradExpected, vPrev;
    String key;
    Map<String, INDArray> tmpStorage, tmpStorage2, tmpStorage3, tmpStorage4 = new HashMap<>();
    org.deeplearning4j.nn.conf.Updater[] updaters = {
            org.deeplearning4j.nn.conf.Updater.SGD,
            org.deeplearning4j.nn.conf.Updater.ADAGRAD,
            org.deeplearning4j.nn.conf.Updater.ADAM,
            org.deeplearning4j.nn.conf.Updater.RMSPROP,
    };

    @Before
    public void beforeDo(){
        int nLayers = 2;
        String wKey, bKey;

        gradientSingle.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
        gradientSingle.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

        for (int j=0; j < nLayers; j++){
            wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
            gradientMLN.setGradientFor(wKey, weightGradient.dup());
            bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY ;
            gradientMLN.setGradientFor(bKey, biasGradient.dup());
        }

        val = null;
        gradExpected = null;
        vPrev = null;
        tmpStorage = new HashMap<>();
        tmpStorage2 = new HashMap<>();
        tmpStorage3 = new HashMap<>();
        tmpStorage4 = new HashMap<>();

    }

    @Test
    public void testLearningRateExponentialDecaySingleLayer() {
        int iterations = 2;

        double lr = 1e-2;
        double decayRate = 2;
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr)
                .learningRateDecayPolicy(LearningRatePolicy.Exponential)
                .lrPolicyDecayRate(decayRate)
                .iterations(iterations)
                .layer(new DenseLayer.Builder()
                        .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .build();

        int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);

        Gradient gradientActual = new DefaultGradient();
        gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
        gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
        for (int i = 0; i < iterations; i++) {
            updater.update(layer, gradientActual, i, 1);
            double expectedLr = calcExponentialDecay(lr, decayRate, i);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
        }
    }


    @Test
    public void testLearningRateInverseDecaySingleLayer() {
        int iterations = 2;

        double lr = 1e-2;
        double decayRate = 2;
        double power = 3;
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr)
                .learningRateDecayPolicy(LearningRatePolicy.Inverse)
                .lrPolicyDecayRate(decayRate)
                .lrPolicyPower(power)
                .iterations(iterations)
                .layer(new DenseLayer.Builder()
                        .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .build();

        int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);

        Gradient gradientActual = new DefaultGradient();
        gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
        gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

        for (int i = 0; i < iterations; i++) {
            updater.update(layer, gradientActual, i, 1);
            double expectedLr = calcInverseDecay(lr, decayRate, i, power);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
        }
    }

    @Test
    public void testLearningRateStepDecaySingleLayer() {
        int iterations = 2;

        double lr = 1e-2;
        double decayRate = 2;
        double steps = 3;
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr)
                .learningRateDecayPolicy(LearningRatePolicy.Step)
                .lrPolicyDecayRate(decayRate)
                .lrPolicySteps(steps)
                .iterations(iterations)
                .layer(new DenseLayer.Builder()
                        .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .build();

        int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);

        Gradient gradientActual = new DefaultGradient();
        gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
        gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

        for (int i = 0; i < iterations; i++) {
            updater.update(layer, gradientActual, i, 1);
            double expectedLr = calcStepDecay(lr, decayRate, i, steps);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
        }
    }


    @Test
    public void testLearningRatePolyDecaySingleLayer() {
        int iterations = 2;
        double lr = 1e-2;
        double power = 3;
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr)
                .learningRateDecayPolicy(LearningRatePolicy.Poly)
                .lrPolicyPower(power)
                .iterations(iterations)
                .layer(new DenseLayer.Builder()
                        .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .build();

        int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);

        Gradient gradientActual = new DefaultGradient();
        gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
        gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

        for (int i = 0; i < iterations; i++) {
            updater.update(layer, gradientActual, i, 1);
            double expectedLr = calcPolyDecay(lr, i, power, iterations);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
        }
    }


    @Test
    public void testLearningRateSigmoidDecaySingleLayer() {
        int iterations = 2;
        double lr = 1e-2;
        double decayRate = 2;
        double steps = 3;

        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr)
                .learningRateDecayPolicy(LearningRatePolicy.Sigmoid)
                .lrPolicyDecayRate(decayRate)
                .lrPolicySteps(steps)
                .iterations(iterations)
                .layer(new DenseLayer.Builder()
                        .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .build();

        int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0,params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);

        Gradient gradientActual = new DefaultGradient();
        gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
        gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

        for (int i = 0; i < iterations; i++) {
            updater.update(layer, gradientActual, i, 1);
            double expectedLr = calcSigmoidDecay(layer.conf().getLearningRateByParam("W"), decayRate, i, steps);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
            assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
        }
    }


    @Test
    public void testLearningRateScheduleSingleLayer() {
        Map<Integer, Double> learningRateAfter = new HashMap<>();
        learningRateAfter.put(1, 0.2);
        int iterations = 2;

        for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
            double lr = 1e-2;
            NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                    .learningRate(lr).learningRateSchedule(learningRateAfter)
                    .learningRateDecayPolicy(LearningRatePolicy.Schedule)
                    .iterations(iterations)
                    .layer(new DenseLayer.Builder()
                            .nIn(nIn).nOut(nOut).updater(updaterFunc).build())
                    .build();

            int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
            INDArray params = Nd4j.create(1, numParams);
            Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
            Updater updater = UpdaterCreator.getUpdater(layer);

            Gradient gradientActual = new DefaultGradient();
            gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
            gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

            Gradient gradientExpected = new DefaultGradient();
            gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
            gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

            for (int i = 0; i < 2; i++) {
                updater.update(layer, gradientActual, i, 1);

                if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
                    lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                else if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
                    lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                else if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
                    lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                else if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
                    lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                assertEquals(lr, layer.conf().getLearningRateByParam("W"), 1e-4);
            }
        }
    }


    @Test
    public void testLearningRateScheduleMLN(){
        Map<Integer,Double> learningRateAfter = new HashMap<>();
        learningRateAfter.put(1, 0.2);
        int iterations = 2;
        int[] nIns = {4,2};
        int[] nOuts = {2,3};

        for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
            double lr = 1e-2;

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Schedule)
                    .learningRateSchedule(learningRateAfter).iterations(iterations)
                    .updater(updaterFunc)
                    .list()
                    .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build())
                    .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).build())
                    .backprop(true).pretrain(false)
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            Updater updater = UpdaterCreator.getUpdater(net);
            String wKey, bKey;

            for (int i = 0; i < 2; i++) {
                Gradient gradientActual = new DefaultGradient();
                Gradient gradientExpected = new DefaultGradient();
                for (int k = 0; k < net.getnLayers(); k++) {
                    wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
                    gradientActual.setGradientFor(wKey, weightGradient.dup());
                    gradientExpected.setGradientFor(wKey, weightGradient.dup());
                    bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
                    gradientActual.setGradientFor(bKey, biasGradient.dup());
                    gradientExpected.setGradientFor(bKey, biasGradient.dup());
                }

                updater.update(net, gradientActual, i, 1);
                if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
                    lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                else if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
                    lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                else if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
                    lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
                else if(updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
                    lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);

                if(i == 0) assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), lr);
                else assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), learningRateAfter.get(1));
            }
        }
    }

    @Test
    public void testLearningRateScoreDecay(){
        double lr = 0.01;
        double lrScoreDecay = 0.10;
        int[] nIns = {4,2};
        int[] nOuts = {2,3};
        int oldScore = 1;
        int newScore = 1;
        int iteration = 3;
        INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr)
                .learningRateDecayPolicy(LearningRatePolicy.Score)
                .lrPolicyDecayRate(lrScoreDecay)
                .list()
                .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build())
                .backprop(true).pretrain(false)
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), new NegativeDefaultStepFunction(), null, net);
        opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
        assertEquals(lrScoreDecay, net.getLayer(0).conf().getLrPolicyDecayRate(), 1e-4);
        assertEquals(lr*(lrScoreDecay + Nd4j.EPS_THRESHOLD), net.getLayer(0).conf().getLearningRateByParam("W"), 1e-4);

    }

    @Test
    public void testOriginalLearningRateUnchanged() {
        // Confirm learning rate is unchanged while hash is updated

        DataSet ds = new IrisDataSetIterator(150,150).next();
        ds.normalizeZeroMeanZeroUnitVariance();

        Nd4j.getRandom().setSeed(12345);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .regularization(false)
                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT)
                .learningRate(1.0)
                .learningRateDecayPolicy(LearningRatePolicy.Score)
                .lrPolicyDecayRate(0.10)
                .weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1))
                .updater(org.deeplearning4j.nn.conf.Updater.SGD)
                .seed(12345L)
                .list()
                .layer(0, new DenseLayer.Builder()
                        .nIn(4).nOut(3)
                        .activation("sigmoid")
                        .build())
                .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE)
                        .activation("tanh")
                        .nIn(3).nOut(3)
                        .build())
                .pretrain(false).backprop(true)
                .build();
        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
        mln.init();

        //Run a number of iterations of learning
        mln.setInput(ds.getFeatureMatrix());
        mln.setLabels(ds.getLabels());
        mln.computeGradientAndScore();
        for( int j=0; j<1; j++ ) mln.fit(ds);
        mln.computeGradientAndScore();

        double lr0 = mln.getLayer(0).conf().getLayer().getLearningRate();
        double lr1 = mln.getLayer(1).conf().getLayer().getLearningRate();
        assertEquals(1.0, lr0, 0.0);
        assertEquals(1.0, lr1, 0.0);
    }

    @Test
    public void testMomentumScheduleSingleLayer(){
        double lr = 1e-2;
        double mu = 0.6;
        Map<Integer,Double> momentumAfter = new HashMap<>();
        momentumAfter.put(1, 0.2);
        int iterations = 2;

        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr).momentum(mu)
                .momentumAfter(momentumAfter).iterations(iterations)
                .layer(new DenseLayer.Builder()
                        .nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
                .build();

        int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
        INDArray params = Nd4j.create(1, numParams);
        Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
        Updater updater = UpdaterCreator.getUpdater(layer);

        Gradient gradientExpected = new DefaultGradient();
        gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
        gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

        for (int i = 0; i < 2; i++) {
            updater.update(layer, gradientSingle, i, 1);
            mu = testNesterovsComputation(gradientSingle, gradientExpected, lr, mu, momentumAfter, i);
            assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
        }
    }

    @Test
    public void testMomentumScheduleMLN(){
        double lr = 1e-2;
        double mu = 0.6;
        Map<Integer,Double> momentumAfter = new HashMap<>();
        momentumAfter.put(1, 0.2);
        int iterations = 2;
        int[] nIns = {4,2};
        int[] nOuts = {2,3};

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .learningRate(lr).momentum(mu).momentumAfter(momentumAfter).iterations(iterations)
                .list()
                .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
                .layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build())
                .backprop(true).pretrain(false)
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        Updater updater = UpdaterCreator.getUpdater(net);

        String wKey, bKey;

        Gradient gradientExpected = new DefaultGradient();
        for (int k=0; k < net.getnLayers(); k++){
            wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
            gradientExpected.setGradientFor(wKey, weightGradient.dup());
            bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY ;
            gradientExpected.setGradientFor(bKey, biasGradient.dup());
        }

        for (int i = 0; i < 2; i++) {
            updater.update(net, gradientMLN, i, 1);
            mu = testNesterovsComputation(gradientMLN, gradientExpected, lr, mu, momentumAfter, i);
            assertEquals(mu, net.getLayer(1).conf().getLayer().getMomentum(), 1e-4);
        }
    }


    @Test
    public void testUpdatingInConf() throws Exception {

        OptimizationAlgorithm[] optimizationAlgorithms = new OptimizationAlgorithm[]{
                OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT,
                OptimizationAlgorithm.LINE_GRADIENT_DESCENT,
                OptimizationAlgorithm.CONJUGATE_GRADIENT,
                OptimizationAlgorithm.LBFGS};

        for(OptimizationAlgorithm oa : optimizationAlgorithms ) {
            Map<Integer, Double> momentumSchedule = new HashMap<>();
            double m = 0.001;
            for (int i = 0; i <= 100; i++) {
                momentumSchedule.put(i, Math.min(m, 0.9999));
                m += 0.001;
            }

            Map<Integer, Double> learningRateSchedule = new HashMap<>();
            double lr = 0.1;
            for (int i = 0; i <= 100; i++) {
                learningRateSchedule.put(i, lr);
                lr *= 0.96;
            }

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(oa)
                    .iterations(1)
                    .learningRateDecayPolicy(LearningRatePolicy.Schedule)
                    .learningRateSchedule(learningRateSchedule)
                    .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS)
                    .momentum(0.9)
                    .momentumAfter(momentumSchedule)
                    .regularization(true).l2(0.0001)
                    .list()
                    .layer(0, new DenseLayer.Builder().nIn(784).nOut(10).build())
                    .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build())
                    .pretrain(false).backprop(true)
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            int last_layer_index = 1;

            DataSetIterator trainIter = new MnistDataSetIterator(64, true, 12345);


            int count = 0;
            while (trainIter.hasNext()) {
                net.fit(trainIter.next());

                // always print the same number (0.1 and 0.9)
                double lrLastLayer = (net.getLayer(last_layer_index)).conf().getLearningRateByParam("W");
                double mLastLayer = (net.getLayer(last_layer_index)).conf().getLayer().getMomentum();

                assertEquals(learningRateSchedule.get(count), lrLastLayer, 1e-6);
                assertEquals(momentumSchedule.get(count), mLastLayer, 1e-6);

                if (count++ >= 100) break;
            }
        }
    }

///// Updater Calculations

    public double testSGDComputation(Gradient gradientActual, Gradient gradientExpected, double lr, Map<Integer, Double> learningRateAfter, int i){
        for (Map.Entry<String, INDArray> entry : gradientExpected.gradientForVariable().entrySet()) {
            if (learningRateAfter != null)
                lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
            key = entry.getKey();
            val = entry.getValue();
            gradExpected = val.mul(lr);
            gradientExpected.setGradientFor(key, gradExpected);
            INDArray act = gradientActual.getGradientFor(key);
            assertEquals(gradExpected, act);
        }
        return lr;
    }

    public double testNesterovsComputation(Gradient gradientActual, Gradient gradientExpected, double lr, double mu, Map<Integer, Double> momentumAfter, int i) {

        for (Map.Entry<String, INDArray> entry : gradientExpected.gradientForVariable().entrySet()) {
            if(momentumAfter !=null)
                mu = (momentumAfter.containsKey(i)) ? momentumAfter.get(i) : mu;
            key = entry.getKey();
            val = entry.getValue();
            INDArray vTmp = tmpStorage.get(key);

            if(vTmp == null)
                vTmp = Nd4j.zeros(val.shape());
            vPrev = vTmp;
            vTmp = vPrev.mul(mu).subi(val.mul(lr));
            gradExpected = vPrev.muli(mu).addi(vTmp.mul(-mu - 1));
            gradientExpected.setGradientFor(key, gradExpected);

            INDArray act = gradientActual.getGradientFor(entry.getKey());
            assertEquals(gradExpected, act);
            tmpStorage.put(key, vTmp);
        }
        return mu;
    }


    public double testAdaGradComputation(Gradient gradientActual, Gradient gradientExpected, double lr, Map<Integer, Double> learningRateAfter, int i) {

        for (Map.Entry<String, INDArray> entry : gradientExpected.gradientForVariable().entrySet()) {
            if (learningRateAfter != null)
                lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
            key = entry.getKey();
            val = entry.getValue();
            INDArray historicalGradient = tmpStorage.get(key);

            if(historicalGradient == null) historicalGradient = val.mul(val);
            else historicalGradient.addi(val.mul(val));

            gradExpected = Transforms.sqrt(historicalGradient.add(epsilon)).rdiv(lr).mul(val);
            assertEquals(gradExpected, gradientActual.getGradientFor(key));
            gradientExpected.setGradientFor(key, gradExpected);
            tmpStorage.put(key, historicalGradient);
        }

        return lr;
    }

    public double testAdamComputation(Gradient gradientActual, Gradient gradientExpected, double lr, Map<Integer, Double> learningRateAfter, int i) {
        double beta1 = 0.9;
        double beta2 = 0.999;

        for (Map.Entry<String, INDArray> entry : gradientExpected.gradientForVariable().entrySet()) {
            if (learningRateAfter != null)
                lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
            key = entry.getKey();
            val = entry.getValue();

            INDArray mTmp = tmpStorage2.get(key);
            INDArray vTmp = tmpStorage3.get(key);

            if(mTmp == null) mTmp = Nd4j.zeros(val.shape());
            if(vTmp == null) vTmp = Nd4j.zeros(val.shape());

            mTmp.muli(beta1).addi(val.mul(1.0-beta1));
            vTmp.muli(beta2).addi(val.mul(val).mul(1.0-beta2));

            double beta1t = FastMath.pow(beta1, i);
            double beta2t = FastMath.pow(beta2, i);
            double alphat = lr * FastMath.sqrt(1-beta2t)/(1-beta1t);
            if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon;

            gradExpected = mTmp.mul(alphat).divi(Transforms.sqrt(vTmp).addi(epsilon));
            gradientExpected.setGradientFor(key, gradExpected);
            assertEquals(gradExpected, gradientActual.getGradientFor(key));

            tmpStorage2.put(key, mTmp);
            tmpStorage3.put(key, vTmp);
        }
        return lr;
    }

    public double testRMSPropComputation(Gradient gradientActual, Gradient gradientExpected, double lr, Map<Integer, Double> learningRateAfter, int i) {
        double rmsDecay = 0.95;
        double epsilon = 1e-8;

        for (Map.Entry<String, INDArray> entry : gradientExpected.gradientForVariable().entrySet()) {
            if (learningRateAfter != null)
                lr = (learningRateAfter.containsKey(i)) ? learningRateAfter.get(i) : lr;
            key = entry.getKey();
            val = entry.getValue();
            INDArray lastGTmp = tmpStorage4.get(key);

            if(lastGTmp==null)
                lastGTmp = Nd4j.zeros(val.shape());

            lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
            gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(epsilon)));
            gradientExpected.setGradientFor(key, gradExpected);

            assertEquals(gradExpected, gradientActual.getGradientFor(key));
            tmpStorage4.put(key, lastGTmp);
        }

        return lr;
    }

    ///// Learning Rate Decay Policy Calculations

    public double calcExponentialDecay(double lr, double decayRate, double iteration){
        return lr * Math.pow(decayRate, iteration);
    }

    public double calcInverseDecay(double lr, double decayRate, double iteration, double power){
        return lr / Math.pow((1+decayRate * iteration), power);
    }

    public double calcStepDecay(double lr, double decayRate, double iteration, double steps){
        return lr * Math.pow(decayRate, Math.floor(iteration/steps));
    }

    public double calcPolyDecay(double lr, double iteration, double power, double maxIterations){
        return lr * Math.pow((1 - iteration/maxIterations), power);
    }
    public double calcSigmoidDecay(double lr, double decayRate, double iteration, double steps){
        return lr / (1 + Math.exp(-decayRate * (iteration - steps)));
    }

}

Other Java examples (source code examples)

Here is a short list of links related to this Java TestDecayPolicies.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.