|
Java example source code file (BackPropMLPTest.java)
The BackPropMLPTest.java Java example source codepackage org.deeplearning4j.nn.multilayer; import static org.junit.Assert.*; import java.util.Arrays; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.OptimizationAlgorithm; import org.deeplearning4j.nn.conf.MultiLayerConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; import org.junit.Test; import org.nd4j.linalg.api.iter.NdIndexIterator; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; public class BackPropMLPTest { @Test public void testMLPTrivial(){ //Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1. MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[]{1},"sigmoid")); network.setListeners(new ScoreIterationListener(1)); network.init(); DataSetIterator iter = new IrisDataSetIterator(1,10); while( iter.hasNext() ) network.fit(iter.next()); } @Test public void testMLP(){ //Simple mini-batch test with multiple hidden layers MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[]{5,4,3},"sigmoid"); System.out.println(conf); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); DataSetIterator iter = new IrisDataSetIterator(10,100); while( iter.hasNext() ) network.fit(iter.next()); } @Test public void testMLP2(){ //Simple mini-batch test with multiple hidden layers MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[]{5,15,3},"tanh"); System.out.println(conf); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); DataSetIterator iter = new IrisDataSetIterator(12,120); while( iter.hasNext() ) network.fit(iter.next()); } @Test public void testSingleExampleWeightUpdates() { //Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1. //Manually calculate weight updates (entirely outside of DL4J and ND4J) // and compare expected and actual weights after backprop DataSetIterator iris = new IrisDataSetIterator(1,10); MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[]{1},"sigmoid")); network.init(); Layer[] layers = network.getLayers(); final boolean printCalculations = true; while(iris.hasNext()){ DataSet data = iris.next(); INDArray x = data.getFeatureMatrix(); INDArray y = data.getLabels(); float[] xFloat = asFloat(x); float[] yFloat = asFloat(y); //Do forward pass: INDArray l1Weights = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Hidden layer INDArray l2Weights = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Output layer INDArray l1Bias = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup(); INDArray l2Bias = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup(); float[] l1WeightsFloat = asFloat(l1Weights); float[] l2WeightsFloat = asFloat(l2Weights); float l1BiasFloat = l1Bias.getFloat(0); float[] l2BiasFloatArray = asFloat(l2Bias); float hiddenUnitPreSigmoid = dotProduct(l1WeightsFloat,xFloat)+l1BiasFloat; //z=w*x+b float hiddenUnitPostSigmoid = sigmoid(hiddenUnitPreSigmoid); //a=sigma(z) float[] outputPreSoftmax = new float[3]; //Normally a matrix multiplication here, but only one hidden unit in this trivial example for( int i=0; i<3; i++ ) outputPreSoftmax[i] = hiddenUnitPostSigmoid*l2WeightsFloat[i]+l2BiasFloatArray[i]; float[] outputPostSoftmax = softmax(outputPreSoftmax); //Do backward pass: float[] deltaOut = vectorDifference(outputPostSoftmax,yFloat); //out-labels //deltaHidden = sigmaPrime(hiddenUnitZ) * sum_k (w_jk * \delta_k); here, only one j float deltaHidden = 0.0f; for( int i = 0; i < 3; i++ ) deltaHidden += l2WeightsFloat[i] * deltaOut[i]; deltaHidden *= derivOfSigmoid(hiddenUnitPreSigmoid); //Calculate weight/bias updates: //dL/dw = delta * (activation of prev. layer) //dL/db = delta float[] dLdwOut = new float[3]; for( int i = 0; i < dLdwOut.length; i++) dLdwOut[i] = deltaOut[i] * hiddenUnitPostSigmoid; float[] dLdwHidden = new float[4]; for( int i = 0; i < dLdwHidden.length; i++) dLdwHidden[i] = deltaHidden * xFloat[i]; float[] dLdbOut = deltaOut; float dLdbHidden = deltaHidden; if(printCalculations){ System.out.println("deltaOut = " + Arrays.toString(deltaOut)); System.out.println("deltaHidden = " + deltaHidden); System.out.println("dLdwOut = " + Arrays.toString(dLdwOut)); System.out.println("dLdbOut = " + Arrays.toString(dLdbOut)); System.out.println("dLdwHidden = " + Arrays.toString(dLdwHidden)); System.out.println("dLdbHidden = " + dLdbHidden); } //Calculate new parameters: //w_i = w_i - (learningRate)/(batchSize) * sum_j (dL_j/dw_i) //b_i = b_i - (learningRate)/(batchSize) * sum_j (dL_j/db_i) //Which for batch size of one (here) is simply: //w_i = w_i - learningRate * dL/dw //b_i = b_i - learningRate * dL/db float[] expectedL1WeightsAfter = new float[4]; float[] expectedL2WeightsAfter = new float[3]; float expectedL1BiasAfter = l1BiasFloat - 0.1f * dLdbHidden; float[] expectedL2BiasAfter = new float[3]; for( int i=0; i < 4; i++ ) expectedL1WeightsAfter[i] = l1WeightsFloat[i] - 0.1f * dLdwHidden[i]; for( int i=0; i < 3; i++ ) expectedL2WeightsAfter[i] = l2WeightsFloat[i] - 0.1f * dLdwOut[i]; for( int i = 0; i < 3; i++ ) expectedL2BiasAfter[i] = l2BiasFloatArray[i] - 0.1f * dLdbOut[i]; //Finally, do back-prop on network, and compare parameters vs. expected parameters network.fit(data); /* INDArray l1WeightsAfter = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Hidden layer INDArray l2WeightsAfter = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Output layer INDArray l1BiasAfter = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup(); INDArray l2BiasAfter = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup(); float[] l1WeightsFloatAfter = asFloat(l1WeightsAfter); float[] l2WeightsFloatAfter = asFloat(l2WeightsAfter); float l1BiasFloatAfter = l1BiasAfter.getFloat(0); float[] l2BiasFloatAfter = asFloat(l2BiasAfter); if( printCalculations) { System.out.println("Expected L1 weights = " + Arrays.toString(expectedL1WeightsAfter)); System.out.println("Actual L1 weights = " + Arrays.toString(asFloat(l1WeightsAfter))); System.out.println("Expected L2 weights = " + Arrays.toString(expectedL2WeightsAfter)); System.out.println("Actual L2 weights = " + Arrays.toString(asFloat(l2WeightsAfter))); System.out.println("Expected L1 bias = " + expectedL1BiasAfter); System.out.println("Actual L1 bias = " + Arrays.toString(asFloat(l1BiasAfter))); System.out.println("Expected L2 bias = " + Arrays.toString(expectedL2BiasAfter)); System.out.println("Actual L2 bias = " + Arrays.toString(asFloat(l2BiasAfter))); } float eps = 1e-4f; assertArrayEquals(l1WeightsFloatAfter,expectedL1WeightsAfter,eps); assertArrayEquals(l2WeightsFloatAfter,expectedL2WeightsAfter,eps); assertEquals(l1BiasFloatAfter,expectedL1BiasAfter,eps); assertArrayEquals(l2BiasFloatAfter,expectedL2BiasAfter,eps); */ System.out.println("\n\n--------------"); } } @Test public void testMLPGradientCalculation() { testIrisMiniBatchGradients(1,new int[]{1}, "sigmoid"); testIrisMiniBatchGradients(1, new int[]{5}, "sigmoid"); testIrisMiniBatchGradients(12,new int[]{15,25,10},"sigmoid"); testIrisMiniBatchGradients(50,new int[]{10,50,200,50,10},"tanh"); testIrisMiniBatchGradients(150,new int[]{30,50,20},"tanh"); } private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLayerSizes, String activationFunction) { int totalExamples = 10 * miniBatchSize; if( totalExamples > 150) { totalExamples = miniBatchSize * (150/miniBatchSize); } if( miniBatchSize > 150 ) fail(); DataSetIterator iris = new IrisDataSetIterator(miniBatchSize,totalExamples); MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(hiddenLayerSizes,"sigmoid")); network.init(); Layer[] layers = network.getLayers(); int nLayers = layers.length; while(iris.hasNext()){ DataSet data = iris.next(); INDArray x = data.getFeatureMatrix(); INDArray y = data.getLabels(); //Do forward pass: INDArray[] layerWeights = new INDArray[nLayers]; INDArray[] layerBiases = new INDArray[nLayers]; for( int i=0; i < nLayers; i++ ){ layerWeights[i] = layers[i].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); layerBiases[i] = layers[i].getParam(DefaultParamInitializer.BIAS_KEY).dup(); } INDArray[] layerZs = new INDArray[nLayers]; INDArray[] layerActivations = new INDArray[nLayers]; for( int i= 0; i < nLayers; i++ ){ INDArray layerInput = (i == 0 ? x : layerActivations[i-1]); layerZs[i] = layerInput.mmul(layerWeights[i]).addiRowVector(layerBiases[i]); layerActivations[i] = (i==nLayers-1 ? doSoftmax(layerZs[i].dup()) : doSigmoid(layerZs[i].dup())); } //Do backward pass: INDArray[] deltas = new INDArray[nLayers]; deltas[nLayers - 1] = layerActivations[nLayers-1].sub(y); //Out - labels; shape=[miniBatchSize,nOut]; assertArrayEquals(deltas[nLayers-1].shape(),new int[]{miniBatchSize,3}); for( int i = nLayers - 2; i >= 0; i--){ INDArray sigmaPrimeOfZ; sigmaPrimeOfZ = doSigmoidDerivative(layerZs[i]); INDArray epsilon = layerWeights[i+1].mmul(deltas[i + 1].transpose()).transpose(); deltas[i] = epsilon.mul(sigmaPrimeOfZ); assertArrayEquals(deltas[i].shape(),new int[]{miniBatchSize,hiddenLayerSizes[i]}); } INDArray[] dLdw = new INDArray[nLayers]; INDArray[] dLdb = new INDArray[nLayers]; for( int i = 0; i<nLayers; i++ ){ INDArray prevActivations = (i == 0 ? x : layerActivations[i-1]); //Raw gradients, so not yet divided by mini-batch size (division is done in BaseUpdater) dLdw[i] = deltas[i].transpose().mmul(prevActivations).transpose(); //Shape: [nIn, nOut] dLdb[i] = deltas[i].sum(0); //Shape: [1,nOut] int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]); int nOut = (i < nLayers - 1 ? hiddenLayerSizes[i] : 3); assertArrayEquals(dLdw[i].shape(),new int[]{nIn, nOut}); assertArrayEquals(dLdb[i].shape(),new int[]{1, nOut}); } //Calculate and get gradient, compare to expected network.setInput(x); network.setLabels(y); network.computeGradientAndScore(); Gradient gradient = network.gradientAndScore().getFirst(); float eps = 1e-4f; for( int i=0; i<hiddenLayerSizes.length; i++ ){ String wKey = i + "_" + DefaultParamInitializer.WEIGHT_KEY; String bKey = i + "_" + DefaultParamInitializer.BIAS_KEY; INDArray wGrad = gradient.getGradientFor(wKey); INDArray bGrad = gradient.getGradientFor(bKey); float[] wGradf = asFloat(wGrad); float[] bGradf = asFloat(bGrad); float[] expWGradf = asFloat(dLdw[i]); float[] expBGradf = asFloat(dLdb[i]); assertArrayEquals(wGradf,expWGradf,eps); assertArrayEquals(bGradf,expBGradf,eps); } } } /** Very simple back-prop config set up for Iris. * Learning Rate = 0.1 * No regularization, no Adagrad, no momentum etc. One iteration. */ private static MultiLayerConfiguration getIrisMLPSimpleConfig(int[] hiddenLayerSizes, String activationFunction) { NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder() .iterations(1) .learningRate(0.1).updater(Updater.SGD) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .regularization(false) .seed(12345L) .list(); for( int i = 0; i < hiddenLayerSizes.length; i++) { int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]); lb.layer(i, new DenseLayer.Builder() .nIn(nIn).nOut(hiddenLayerSizes[i]) .weightInit(WeightInit.XAVIER) .updater(Updater.SGD) .activation(activationFunction) .build()); } lb.layer(hiddenLayerSizes.length, new OutputLayer.Builder(LossFunction.MCXENT) .nIn(hiddenLayerSizes[hiddenLayerSizes.length - 1]).nOut(3) .weightInit(WeightInit.XAVIER) .updater(Updater.SGD) .activation(activationFunction.equals("identity") ? "identity" : "softmax") .build()); lb.pretrain(false).backprop(true); return lb.build(); } public static float[] asFloat( INDArray arr) { int len = arr.length(); float[] f = new float[len]; NdIndexIterator iterator = new NdIndexIterator('c',arr.shape()); for( int i = 0; i < len; i++ ) { f[i] = arr.getFloat(iterator.next()); } return f; } public static float dotProduct( float[] x, float[] y) { float sum = 0.0f; for( int i = 0; i < x.length; i++ ) sum += x[i]*y[i]; return sum; } public static float sigmoid( float in) { return (float)(1.0 / (1.0 + Math.exp(-in))); } public static float[] sigmoid(float[] in) { float[] out = new float[in.length]; for( int i=0; i<in.length; i++ ) out[i] = sigmoid(in[i]); return out; } public static float derivOfSigmoid( float in) { // float v = (float)( Math.exp(in) / Math.pow(1+Math.exp(in),2.0) ); float v = in * (1-in); return v; } public static float[] derivOfSigmoid( float[] in) { float[] out = new float[in.length]; for( int i=0; i<in.length; i++ ) out[i] = derivOfSigmoid(in[i]); return out; } public static float[] softmax( float[] in) { float[] out = new float[in.length]; float sumExp = 0.0f; for( int i=0; i<in.length; i++ ) sumExp += Math.exp(in[i]); for( int i=0; i<in.length; i++ ) out[i] = (float)Math.exp(in[i])/sumExp; return out; } public static float[] vectorDifference(float[] x, float[] y){ float[] out = new float[x.length]; for( int i=0; i<x.length; i++ ) out[i] = x[i]-y[i]; return out; } public static INDArray doTanh(INDArray input){ return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("tanh", input.dup())); } public static INDArray doTanhDerivative(INDArray input){ return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("tanh", input.dup()).derivative()); } public static INDArray doSoftmax(INDArray input) { return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", input.dup())); } public static INDArray doSigmoid(INDArray input) { return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", input.dup())); } public static INDArray doSigmoidDerivative(INDArray input){ return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("sigmoid", input).derivative()); } } Other Java examples (source code examples)Here is a short list of links related to this Java BackPropMLPTest.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.