hama-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yxji...@apache.org
Subject svn commit: r1513873 [2/2] - in /hama/trunk: ./ ml/src/main/java/org/apache/hama/ml/ann/ ml/src/main/java/org/apache/hama/ml/math/ ml/src/main/java/org/apache/hama/ml/perception/ ml/src/main/java/org/apache/hama/ml/regression/ ml/src/test/java/org/apac...
Date Wed, 14 Aug 2013 13:27:19 GMT
Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java?rev=1513873&view=auto
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
(added)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
Wed Aug 14 13:27:18 2013
@@ -0,0 +1,542 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.ml.ann;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.TrainingMethod;
+import org.apache.hama.ml.math.DenseDoubleMatrix;
+import org.apache.hama.ml.math.DenseDoubleVector;
+import org.apache.hama.ml.math.DoubleMatrix;
+import org.apache.hama.ml.math.DoubleVector;
+import org.apache.hama.ml.math.FunctionFactory;
+import org.apache.hama.ml.writable.VectorWritable;
+import org.junit.Test;
+import org.mortbay.log.Log;
+
+/**
+ * Test the functionality of SmallLayeredNeuralNetwork.
+ * 
+ */
+public class TestSmallLayeredNeuralNetwork {
+
+  @Test
+  public void testReadWrite() {
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.addLayer(2, false,
+        FunctionFactory.createDoubleFunction("IdentityFunction"));
+    ann.addLayer(5, false,
+        FunctionFactory.createDoubleFunction("IdentityFunction"));
+    ann.addLayer(1, true,
+        FunctionFactory.createDoubleFunction("IdentityFunction"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    double learningRate = 0.2;
+    ann.setLearningRate(learningRate);
+    double momentumWeight = 0.5;
+    ann.setMomemtumWeight(momentumWeight);
+    double regularizationWeight = 0.05;
+    ann.setRegularizationWeight(regularizationWeight);
+    // intentionally initialize all weights to 0.5
+    DoubleMatrix[] matrices = new DenseDoubleMatrix[2];
+    matrices[0] = new DenseDoubleMatrix(5, 3, 0.2);
+    matrices[1] = new DenseDoubleMatrix(1, 6, 0.8);
+    ann.setWeightMatrices(matrices);
+
+    // write to file
+    String modelPath = "/tmp/testSmallLayeredNeuralNetworkReadWrite";
+    ann.setModelPath(modelPath);
+    try {
+      ann.writeModelToFile();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    // read from file
+    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
+    assertEquals(annCopy.getClass().getSimpleName(), annCopy.getModelType());
+    assertEquals(modelPath, annCopy.getModelPath());
+    assertEquals(learningRate, annCopy.getLearningRate(), 0.000001);
+    assertEquals(momentumWeight, annCopy.getMomemtumWeight(), 0.000001);
+    assertEquals(regularizationWeight, annCopy.getRegularizationWeight(),
+        0.000001);
+    assertEquals(TrainingMethod.GRADIATE_DESCENT, annCopy.getTrainingMethod());
+
+    // compare weights
+    DoubleMatrix[] weightsMatrices = annCopy.getWeightMatrices();
+    for (int i = 0; i < weightsMatrices.length; ++i) {
+      DoubleMatrix expectMat = matrices[i];
+      DoubleMatrix actualMat = weightsMatrices[i];
+      for (int j = 0; j < expectMat.getRowCount(); ++j) {
+        for (int k = 0; k < expectMat.getColumnCount(); ++k) {
+          assertEquals(expectMat.get(j, k), actualMat.get(j, k), 0.000001);
+        }
+      }
+    }
+  }
+
+  @Test
+  /**
+   * Test the forward functionality.
+   */
+  public void testOutput() {
+    // first network
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.addLayer(2, false,
+        FunctionFactory.createDoubleFunction("IdentityFunction"));
+    ann.addLayer(5, false,
+        FunctionFactory.createDoubleFunction("IdentityFunction"));
+    ann.addLayer(1, true,
+        FunctionFactory.createDoubleFunction("IdentityFunction"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    ann.setLearningRate(0.1);
+    // intentionally initialize all weights to 0.5
+    DoubleMatrix[] matrices = new DenseDoubleMatrix[2];
+    matrices[0] = new DenseDoubleMatrix(5, 3, 0.5);
+    matrices[1] = new DenseDoubleMatrix(1, 6, 0.5);
+    ann.setWeightMatrices(matrices);
+
+    double[] arr = new double[] { 0, 1 };
+    DoubleVector training = new DenseDoubleVector(arr);
+    DoubleVector result = ann.getOutput(training);
+    assertEquals(1, result.getDimension());
+    // assertEquals(3, result.get(0), 0.000001);
+
+    // second network
+    SmallLayeredNeuralNetwork ann2 = new SmallLayeredNeuralNetwork();
+    ann2.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann2.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann2.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann2.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    ann2.setLearningRate(0.3);
+    // intentionally initialize all weights to 0.5
+    DoubleMatrix[] matrices2 = new DenseDoubleMatrix[2];
+    matrices2[0] = new DenseDoubleMatrix(3, 3, 0.5);
+    matrices2[1] = new DenseDoubleMatrix(1, 4, 0.5);
+    ann2.setWeightMatrices(matrices2);
+
+    double[] test = { 0, 0 };
+    double[] result2 = { 0.807476 };
+
+    DoubleVector vec = ann2.getOutput(new DenseDoubleVector(test));
+    assertArrayEquals(result2, vec.toArray(), 0.000001);
+
+    SmallLayeredNeuralNetwork ann3 = new SmallLayeredNeuralNetwork();
+    ann3.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann3.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann3.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann3.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    ann3.setLearningRate(0.3);
+    // intentionally initialize all weights to 0.5
+    DoubleMatrix[] initMatrices = new DenseDoubleMatrix[2];
+    initMatrices[0] = new DenseDoubleMatrix(3, 3, 0.5);
+    initMatrices[1] = new DenseDoubleMatrix(1, 4, 0.5);
+    ann3.setWeightMatrices(initMatrices);
+
+    double[] instance = { 0, 1 };
+    DoubleVector output = ann3.getOutput(new DenseDoubleVector(instance));
+    assertEquals(0.8315410, output.get(0), 0.000001);
+  }
+
+  @Test
+  public void testXORlocal() {
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    ann.setLearningRate(0.5);
+    ann.setMomemtumWeight(0.0);
+
+    int iterations = 50000; // iteration should be set to a very large number
+    double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } };
+    for (int i = 0; i < iterations; ++i) {
+      DoubleMatrix[] matrices = null;
+      for (int j = 0; j < instances.length; ++j) {
+        matrices = ann.trainByInstance(new DenseDoubleVector(instances[j
+            % instances.length]));
+        ann.updateWeightMatrices(matrices);
+      }
+    }
+
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
+      // the expected output is the last element in array
+      double result = instances[i][2];
+      assertEquals(result, ann.getOutput(input).get(0), 0.1);
+    }
+
+    // write model into file and read out
+    String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocal";
+    ann.setModelPath(modelPath);
+    try {
+      ann.writeModelToFile();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
+    // test on instances
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
+      // the expected output is the last element in array
+      double result = instances[i][2];
+      assertEquals(result, annCopy.getOutput(input).get(0), 0.1);
+    }
+  }
+
+  @Test
+  public void testXORWithMomentum() {
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    ann.setLearningRate(0.6);
+    ann.setMomemtumWeight(0.3);
+
+    int iterations = 2000; // iteration should be set to a very large number
+    double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } };
+    for (int i = 0; i < iterations; ++i) {
+      for (int j = 0; j < instances.length; ++j) {
+        ann.trainOnline(new DenseDoubleVector(instances[j % instances.length]));
+      }
+    }
+
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
+      // the expected output is the last element in array
+      double result = instances[i][2];
+      assertEquals(result, ann.getOutput(input).get(0), 0.1);
+    }
+
+    // write model into file and read out
+    String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocalWithMomentum";
+    ann.setModelPath(modelPath);
+    try {
+      ann.writeModelToFile();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
+    // test on instances
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
+      // the expected output is the last element in array
+      double result = instances[i][2];
+      assertEquals(result, annCopy.getOutput(input).get(0), 0.1);
+    }
+  }
+
+  @Test
+  public void testXORLocalWithRegularization() {
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("SquaredError"));
+    ann.setLearningRate(0.7);
+    ann.setMomemtumWeight(0.5);
+    ann.setRegularizationWeight(0.002);
+
+    int iterations = 5000; // iteration should be set to a very large number
+    double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } };
+    for (int i = 0; i < iterations; ++i) {
+      for (int j = 0; j < instances.length; ++j) {
+        ann.trainOnline(new DenseDoubleVector(instances[j % instances.length]));
+      }
+    }
+
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
+      // the expected output is the last element in array
+      double result = instances[i][2];
+      assertEquals(result, ann.getOutput(input).get(0), 0.05);
+    }
+
+    // write model into file and read out
+    String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocalWithRegularization";
+    ann.setModelPath(modelPath);
+    try {
+      ann.writeModelToFile();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
+    // test on instances
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
+      // the expected output is the last element in array
+      double result = instances[i][2];
+      assertEquals(result, annCopy.getOutput(input).get(0), 0.05);
+    }
+  }
+
+  @Test
+  public void testTwoClassClassification() {
+    // use logistic regression data
+    String filepath = "src/test/resources/logistic_regression_data.txt";
+    List<double[]> instanceList = new ArrayList<double[]>();
+
+    try {
+      BufferedReader br = new BufferedReader(new FileReader(filepath));
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        String[] tokens = line.trim().split(",");
+        double[] instance = new double[tokens.length];
+        for (int i = 0; i < tokens.length; ++i) {
+          instance[i] = Double.parseDouble(tokens[i]);
+        }
+        instanceList.add(instance);
+      }
+      br.close();
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    int dimension = instanceList.get(0).length - 1;
+
+    // min-max normalization
+    double[] mins = new double[dimension];
+    double[] maxs = new double[dimension];
+    Arrays.fill(mins, Double.MAX_VALUE);
+    Arrays.fill(maxs, Double.MIN_VALUE);
+
+    for (double[] instance : instanceList) {
+      for (int i = 0; i < instance.length - 1; ++i) {
+        if (mins[i] > instance[i]) {
+          mins[i] = instance[i];
+        }
+        if (maxs[i] < instance[i]) {
+          maxs[i] = instance[i];
+        }
+      }
+    }
+
+    for (double[] instance : instanceList) {
+      for (int i = 0; i < instance.length - 1; ++i) {
+        double range = maxs[i] - mins[i];
+        if (range != 0) {
+          instance[i] = (instance[i] - mins[i]) / range;
+        }
+      }
+    }
+
+    // divide dataset into training and testing
+    List<double[]> testInstances = new ArrayList<double[]>();
+    testInstances.addAll(instanceList.subList(instanceList.size() - 100,
+        instanceList.size()));
+    List<double[]> trainingInstances = instanceList.subList(0,
+        instanceList.size() - 100);
+
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.setLearningRate(0.001);
+    ann.setMomemtumWeight(0.1);
+    ann.setRegularizationWeight(0.01);
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("CrossEntropy"));
+
+    long start = new Date().getTime();
+    int iterations = 1000;
+    for (int i = 0; i < iterations; ++i) {
+      for (double[] trainingInstance : trainingInstances) {
+        ann.trainOnline(new DenseDoubleVector(trainingInstance));
+      }
+    }
+    long end = new Date().getTime();
+    Log.info(String.format("Training time: %fs\n",
+        (double) (end - start) / 1000));
+
+    double errorRate = 0;
+    // calculate the error on test instance
+    for (double[] testInstance : testInstances) {
+      DoubleVector instance = new DenseDoubleVector(testInstance);
+      double expected = instance.get(instance.getDimension() - 1);
+      instance = instance.slice(instance.getDimension() - 1);
+      double actual = ann.getOutput(instance).get(0);
+      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected
< 0.5) {
+        ++errorRate;
+      }
+    }
+    errorRate /= testInstances.size();
+
+    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
+  }
+
+  @Test
+  public void testDistributedVersion() {
+    // write data into a sequence file
+    String tmpStrDatasetPath = "/tmp/logistic_regression_data";
+    Path tmpDatasetPath = new Path(tmpStrDatasetPath);
+    String strDataPath = "src/test/resources/logistic_regression_data.txt";
+    String modelPath = "/tmp/distributed-model";
+
+    Configuration conf = new Configuration();
+    List<double[]> instanceList = new ArrayList<double[]>();
+    List<double[]> trainingInstances = null;
+    List<double[]> testInstances = null;
+
+    try {
+      FileSystem fs = FileSystem.get(new URI(tmpStrDatasetPath), conf);
+      fs.delete(tmpDatasetPath, true);
+      if (fs.exists(tmpDatasetPath)) {
+        fs.createNewFile(tmpDatasetPath);
+      }
+
+      BufferedReader br = new BufferedReader(new FileReader(strDataPath));
+      String line = null;
+      int count = 0;
+      while ((line = br.readLine()) != null) {
+        String[] tokens = line.trim().split(",");
+        double[] instance = new double[tokens.length];
+        for (int i = 0; i < tokens.length; ++i) {
+          instance[i] = Double.parseDouble(tokens[i]);
+        }
+        instanceList.add(instance);
+      }
+      br.close();
+
+      int dimension = instanceList.get(0).length - 1;
+      // min-max normalization
+      double[] mins = new double[dimension];
+      double[] maxs = new double[dimension];
+      Arrays.fill(mins, Double.MAX_VALUE);
+      Arrays.fill(maxs, Double.MIN_VALUE);
+
+      for (double[] instance : instanceList) {
+        for (int i = 0; i < instance.length - 1; ++i) {
+          mins[i] = Math.min(mins[i], instance[i]);
+          maxs[i] = Math.max(maxs[i], instance[i]);
+        }
+      }
+
+      for (double[] instance : instanceList) {
+        for (int i = 0; i < instance.length - 1; ++i) {
+          double range = maxs[i] - mins[i];
+          if (range != 0) {
+            instance[i] = (instance[i] - mins[i]) / range;
+          }
+        }
+      }
+
+      // write training data to temporal sequence file
+      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
+          tmpDatasetPath, LongWritable.class, VectorWritable.class);
+      int testSize = 150;
+
+      Collections.shuffle(instanceList);
+      testInstances = new ArrayList<double[]>();
+      testInstances.addAll(instanceList.subList(instanceList.size() - testSize,
+          instanceList.size()));
+      trainingInstances = instanceList.subList(0, instanceList.size()
+          - testSize);
+
+      for (double[] instance : trainingInstances) {
+        DoubleVector vec = new DenseDoubleVector(instance);
+        writer.append(new LongWritable(count++), new VectorWritable(vec));
+      }
+      writer.close();
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (URISyntaxException e) {
+      e.printStackTrace();
+    }
+
+    // create model
+    int dimension = 8;
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.setLearningRate(0.7);
+    ann.setMomemtumWeight(0.5);
+    ann.setRegularizationWeight(0.1);
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("CrossEntropy"));
+    ann.setModelPath(modelPath);
+
+    long start = new Date().getTime();
+    Map<String, String> trainingParameters = new HashMap<String, String>();
+    trainingParameters.put("tasks", "5");
+    trainingParameters.put("training.max.iterations", "2000");
+    trainingParameters.put("training.batch.size", "300");
+    trainingParameters.put("convergence.check.interval", "1000");
+    ann.train(tmpDatasetPath, trainingParameters);
+
+    long end = new Date().getTime();
+
+    // validate results
+    double errorRate = 0;
+    // calculate the error on test instance
+    for (double[] testInstance : testInstances) {
+      DoubleVector instance = new DenseDoubleVector(testInstance);
+      double expected = instance.get(instance.getDimension() - 1);
+      instance = instance.slice(instance.getDimension() - 1);
+      double actual = ann.getOutput(instance).get(0);
+      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected
< 0.5) {
+        ++errorRate;
+      }
+    }
+    errorRate /= testInstances.size();
+
+    Log.info(String.format("Training time: %fs\n",
+        (double) (end - start) / 1000));
+    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
+  }
+
+}

Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java?rev=1513873&view=auto
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java
(added)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java
Wed Aug 14 13:27:18 2013
@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.ml.ann;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hama.ml.math.DenseDoubleMatrix;
+import org.apache.hama.ml.math.DoubleMatrix;
+import org.junit.Test;
+
+/**
+ * Test the functionalities of SmallLayeredNeuralNetworkMessage.
+ * 
+ */
+public class TestSmallLayeredNeuralNetworkMessage {
+
+  @Test
+  public void testReadWriteWithoutPrev() {
+    double error = 0.22;
+    double[][] matrix1 = new double[][] { { 0.1, 0.2, 0.8, 0.5 },
+        { 0.3, 0.4, 0.6, 0.2 }, { 0.5, 0.6, 0.1, 0.5 } };
+    double[][] matrix2 = new double[][] { { 0.8, 1.2, 0.5 } };
+    DoubleMatrix[] matrices = new DoubleMatrix[2];
+    matrices[0] = new DenseDoubleMatrix(matrix1);
+    matrices[1] = new DenseDoubleMatrix(matrix2);
+
+    boolean isConverge = false;
+
+    SmallLayeredNeuralNetworkMessage message = new SmallLayeredNeuralNetworkMessage(
+        error, isConverge, matrices, null);
+    Configuration conf = new Configuration();
+    String strPath = "/tmp/testReadWriteSmallLayeredNeuralNetworkMessage";
+    Path path = new Path(strPath);
+    try {
+      FileSystem fs = FileSystem.get(new URI(strPath), conf);
+      FSDataOutputStream out = fs.create(path);
+      message.write(out);
+      out.close();
+
+      FSDataInputStream in = fs.open(path);
+      SmallLayeredNeuralNetworkMessage readMessage = new SmallLayeredNeuralNetworkMessage(
+          0, isConverge, null, null);
+      readMessage.readFields(in);
+      in.close();
+      assertEquals(error, readMessage.getTrainingError(), 0.000001);
+      assertFalse(readMessage.isConverge());
+      DoubleMatrix[] readMatrices = readMessage.getCurMatrices();
+      assertEquals(2, readMatrices.length);
+      for (int i = 0; i < readMatrices.length; ++i) {
+        double[][] doubleMatrices = ((DenseDoubleMatrix) readMatrices[i])
+            .getValues();
+        double[][] doubleExpected = ((DenseDoubleMatrix) matrices[i])
+            .getValues();
+        for (int r = 0; r < doubleMatrices.length; ++r) {
+          assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001);
+        }
+      }
+
+      DoubleMatrix[] readPrevMatrices = readMessage.getPrevMatrices();
+      assertNull(readPrevMatrices);
+
+      // delete
+      fs.delete(path, true);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (URISyntaxException e) {
+      e.printStackTrace();
+    }
+  }
+
+  @Test
+  public void testReadWriteWithPrev() {
+    double error = 0.22;
+    boolean isConverge = true;
+
+    double[][] matrix1 = new double[][] { { 0.1, 0.2, 0.8, 0.5 },
+        { 0.3, 0.4, 0.6, 0.2 }, { 0.5, 0.6, 0.1, 0.5 } };
+    double[][] matrix2 = new double[][] { { 0.8, 1.2, 0.5 } };
+    DoubleMatrix[] matrices = new DoubleMatrix[2];
+    matrices[0] = new DenseDoubleMatrix(matrix1);
+    matrices[1] = new DenseDoubleMatrix(matrix2);
+
+    double[][] prevMatrix1 = new double[][] { { 0.1, 0.1, 0.2, 0.3 },
+        { 0.2, 0.4, 0.1, 0.5 }, { 0.5, 0.1, 0.5, 0.2 } };
+    double[][] prevMatrix2 = new double[][] { { 0.1, 0.2, 0.5, 0.9 },
+        { 0.3, 0.5, 0.2, 0.6 }, { 0.6, 0.8, 0.7, 0.5 } };
+
+    DoubleMatrix[] prevMatrices = new DoubleMatrix[2];
+    prevMatrices[0] = new DenseDoubleMatrix(prevMatrix1);
+    prevMatrices[1] = new DenseDoubleMatrix(prevMatrix2);
+
+    SmallLayeredNeuralNetworkMessage message = new SmallLayeredNeuralNetworkMessage(
+        error, isConverge, matrices, prevMatrices);
+    Configuration conf = new Configuration();
+    String strPath = "/tmp/testReadWriteSmallLayeredNeuralNetworkMessageWithPrev";
+    Path path = new Path(strPath);
+    try {
+      FileSystem fs = FileSystem.get(new URI(strPath), conf);
+      FSDataOutputStream out = fs.create(path);
+      message.write(out);
+      out.close();
+
+      FSDataInputStream in = fs.open(path);
+      SmallLayeredNeuralNetworkMessage readMessage = new SmallLayeredNeuralNetworkMessage(
+          0, isConverge, null, null);
+      readMessage.readFields(in);
+      in.close();
+
+      assertTrue(readMessage.isConverge());
+
+      DoubleMatrix[] readMatrices = readMessage.getCurMatrices();
+      assertEquals(2, readMatrices.length);
+      for (int i = 0; i < readMatrices.length; ++i) {
+        double[][] doubleMatrices = ((DenseDoubleMatrix) readMatrices[i])
+            .getValues();
+        double[][] doubleExpected = ((DenseDoubleMatrix) matrices[i])
+            .getValues();
+        for (int r = 0; r < doubleMatrices.length; ++r) {
+          assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001);
+        }
+      }
+
+      DoubleMatrix[] readPrevMatrices = readMessage.getPrevMatrices();
+      assertEquals(2, readPrevMatrices.length);
+      for (int i = 0; i < readPrevMatrices.length; ++i) {
+        double[][] doubleMatrices = ((DenseDoubleMatrix) readPrevMatrices[i])
+            .getValues();
+        double[][] doubleExpected = ((DenseDoubleMatrix) prevMatrices[i])
+            .getValues();
+        for (int r = 0; r < doubleMatrices.length; ++r) {
+          assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001);
+        }
+      }
+
+      // delete
+      fs.delete(path, true);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (URISyntaxException e) {
+      e.printStackTrace();
+    }
+  }
+
+}

Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java?rev=1513873&r1=1513872&r2=1513873&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java
(original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java
Wed Aug 14 13:27:18 2013
@@ -40,7 +40,6 @@ import org.apache.hama.ml.math.DoubleMat
 import org.apache.hama.ml.math.DoubleVector;
 import org.apache.hama.ml.writable.MatrixWritable;
 import org.apache.hama.ml.writable.VectorWritable;
-import org.junit.Ignore;
 import org.junit.Test;
 
 public class TestSmallMultiLayerPerceptron {
@@ -305,10 +304,9 @@ public class TestSmallMultiLayerPerceptr
       e.printStackTrace();
     }
   }
-  
+
   /**
-   * Test training with momentum.
-   * The MLP can converge faster.
+   * Test training with momentum. The MLP can converge faster.
    */
   @Test
   public void testWithMomentum() {
@@ -359,7 +357,6 @@ public class TestSmallMultiLayerPerceptr
    * Test the XOR problem.
    */
   @Test
-  @Ignore
   public void testTrainingByXOR() {
     // write in some training instances
     Configuration conf = new Configuration();

Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java?rev=1513873&view=auto
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java (added)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLinearRegression.java Wed
Aug 14 13:27:18 2013
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hama.ml.regression;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hama.ml.math.DenseDoubleVector;
+import org.apache.hama.ml.math.DoubleVector;
+import org.junit.Test;
+import org.mortbay.log.Log;
+
+/**
+ * Test the functionalities of the linear regression model.
+ * 
+ */
+public class TestLinearRegression {
+
+  @Test
+  public void testLinearRegressionSimple() {
+    // y = 2.1 * x_1 + 0.7 * x_2 * 0.1 * x_3
+    double[][] instances = { { 1, 1, 1, 2.9 }, { 5, 2, 3, 12.2 },
+        { 2, 5, 8, 8.5 }, { 0.5, 0.1, 0.2, 1.14 }, { 10, 20, 30, 38 },
+        { 0.6, 20, 5, 16.76 } };
+
+    LinearRegression regression = new LinearRegression(instances[0].length - 1);
+    regression.setLearningRate(0.001);
+    regression.setMomemtumWeight(0.1);
+
+    int iterations = 100;
+    for (int i = 0; i < iterations; ++i) {
+      for (int j = 0; j < instances.length; ++j) {
+        regression.trainOnline(new DenseDoubleVector(instances[j]));
+      }
+    }
+
+    double relativeError = 0;
+    for (int i = 0; i < instances.length; ++i) {
+      DoubleVector test = new DenseDoubleVector(instances[i]);
+      double expected = test.get(test.getDimension() - 1);
+      test = test.slice(test.getDimension() - 1);
+      double actual = regression.getOutput(test).get(0);
+      relativeError += Math.abs((expected - actual) / expected);
+    }
+
+    relativeError /= instances.length;
+    Log.info(String.format("Relative error %f%%\n", relativeError));
+  }
+
+  @Test
+  public void testLinearRegressionOnlineTraining() {
+    // read linear regression data
+    String filepath = "src/test/resources/linear_regression_data.txt";
+    List<double[]> instanceList = new ArrayList<double[]>();
+
+    try {
+      BufferedReader br = new BufferedReader(new FileReader(filepath));
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        if (line.startsWith("#")) { // ignore comments
+          continue;
+        }
+        String[] tokens = line.trim().split(" ");
+        double[] instance = new double[tokens.length];
+        for (int i = 0; i < tokens.length; ++i) {
+          instance[i] = Double.parseDouble(tokens[i]);
+        }
+        instanceList.add(instance);
+      }
+      br.close();
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    // divide dataset into training and testing
+    List<double[]> testInstances = new ArrayList<double[]>();
+    testInstances.addAll(instanceList.subList(instanceList.size() - 20,
+        instanceList.size()));
+    List<double[]> trainingInstances = instanceList.subList(0,
+        instanceList.size() - 20);
+
+    int dimension = instanceList.get(0).length - 1;
+
+    LinearRegression regression = new LinearRegression(dimension);
+    regression.setLearningRate(0.00000005);
+    regression.setMomemtumWeight(0.1);
+    regression.setRegularizationWeight(0.05);
+    int iterations = 2000;
+    for (int i = 0; i < iterations; ++i) {
+      for (double[] trainingInstance : trainingInstances) {
+        regression.trainOnline(new DenseDoubleVector(trainingInstance));
+      }
+    }
+
+    double relativeError = 0.0;
+    // calculate the error on test instance
+    for (double[] testInstance : testInstances) {
+      DoubleVector instance = new DenseDoubleVector(testInstance);
+      double expected = instance.get(instance.getDimension() - 1);
+      instance = instance.slice(instance.getDimension() - 1);
+      double actual = regression.getOutput(instance).get(0);
+      if (expected == 0) {
+        expected = 0.0000001;
+      }
+      relativeError += Math.abs((expected - actual) / expected);
+    }
+    relativeError /= testInstances.size();
+
+    Log.info(String.format("Relative error: %f%%\n", relativeError * 100));
+  }
+
+}

Added: hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java?rev=1513873&view=auto
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java
(added)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/regression/TestLogisticRegression.java
Wed Aug 14 13:27:18 2013
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hama.ml.regression;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hama.ml.math.DenseDoubleVector;
+import org.apache.hama.ml.math.DoubleVector;
+import org.junit.Test;
+import org.mortbay.log.Log;
+
+/**
+ * Test the functionalities of LogisticRegression.
+ * 
+ */
+public class TestLogisticRegression {
+
+  @Test
+  public void testLogisticRegressionLocal() {
+    // read logistic regression data
+    String filepath = "src/test/resources/logistic_regression_data.txt";
+    List<double[]> instanceList = new ArrayList<double[]>();
+
+    try {
+      BufferedReader br = new BufferedReader(new FileReader(filepath));
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        if (line.startsWith("#")) { // ignore comments
+          continue;
+        }
+        String[] tokens = line.trim().split(",");
+        double[] instance = new double[tokens.length];
+        for (int i = 0; i < tokens.length; ++i) {
+          instance[i] = Double.parseDouble(tokens[i]);
+        }
+        instanceList.add(instance);
+      }
+      br.close();
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    int dimension = instanceList.get(0).length - 1;
+
+    // min-max normalization
+    double[] mins = new double[dimension];
+    double[] maxs = new double[dimension];
+    Arrays.fill(mins, Double.MAX_VALUE);
+    Arrays.fill(maxs, Double.MIN_VALUE);
+
+    for (double[] instance : instanceList) {
+      for (int i = 0; i < instance.length - 1; ++i) {
+        if (mins[i] > instance[i]) {
+          mins[i] = instance[i];
+        }
+        if (maxs[i] < instance[i]) {
+          maxs[i] = instance[i];
+        }
+      }
+    }
+
+    for (double[] instance : instanceList) {
+      for (int i = 0; i < instance.length - 1; ++i) {
+        double range = maxs[i] - mins[i];
+        if (range != 0) {
+          instance[i] = (instance[i] - mins[i]) / range;
+        }
+      }
+    }
+
+    // divide dataset into training and testing
+    List<double[]> testInstances = new ArrayList<double[]>();
+    testInstances.addAll(instanceList.subList(instanceList.size() - 100,
+        instanceList.size()));
+    List<double[]> trainingInstances = instanceList.subList(0,
+        instanceList.size() - 100);
+
+    LogisticRegression regression = new LogisticRegression(dimension);
+    regression.setLearningRate(0.2);
+    regression.setMomemtumWeight(0.1);
+    regression.setRegularizationWeight(0.1);
+    int iterations = 1000;
+    for (int i = 0; i < iterations; ++i) {
+      for (double[] trainingInstance : trainingInstances) {
+        regression.trainOnline(new DenseDoubleVector(trainingInstance));
+      }
+    }
+
+    double errorRate = 0;
+    // calculate the error on test instance
+    for (double[] testInstance : testInstances) {
+      DoubleVector instance = new DenseDoubleVector(testInstance);
+      double expected = instance.get(instance.getDimension() - 1);
+      DoubleVector features = instance.slice(instance.getDimension() - 1);
+      double actual = regression.getOutput(features).get(0);
+      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected
< 0.5) {
+        ++errorRate;
+      }
+
+    }
+    errorRate /= testInstances.size();
+
+    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
+  }
+
+}



Mime
View raw message