hama-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From edwardy...@apache.org
Subject [2/5] hama git commit: HAMA-961: Remove ann package
Date Mon, 23 Nov 2015 02:26:48 GMT
http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/main/java/org/apache/hama/ml/perception/SmallMLPTrainer.java
----------------------------------------------------------------------
diff --git a/ml/src/main/java/org/apache/hama/ml/perception/SmallMLPTrainer.java b/ml/src/main/java/org/apache/hama/ml/perception/SmallMLPTrainer.java
deleted file mode 100644
index 8b08136..0000000
--- a/ml/src/main/java/org/apache/hama/ml/perception/SmallMLPTrainer.java
+++ /dev/null
@@ -1,327 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hama.ml.perception;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.BitSet;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hama.bsp.BSPPeer;
-import org.apache.hama.bsp.sync.SyncException;
-import org.apache.hama.commons.io.VectorWritable;
-import org.apache.hama.commons.math.DenseDoubleMatrix;
-import org.apache.hama.ml.ann.NeuralNetworkTrainer;
-
-/**
- * The perceptron trainer for small scale MLP.
- */
-class SmallMLPTrainer extends NeuralNetworkTrainer {
-
-  /* used by master only, check whether all slaves finishes reading */
-  private BitSet statusSet;
-
-  private int numTrainingInstanceRead = 0;
-  /* Once reader reaches the EOF, the training procedure would be terminated */
-  private boolean terminateTraining = false;
-
-  private SmallMultiLayerPerceptron inMemoryPerceptron;
-
-  private int[] layerSizeArray;
-
-  @Override
-  protected void extraSetup(
-      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer) {
-
-    // obtain parameters
-    this.trainingMode = conf.get("training.mode", "minibatch.gradient.descent");
-    // mini-batch by default
-    this.batchSize = conf.getInt("training.batch.size", 100);
-
-    this.statusSet = new BitSet(peer.getConfiguration().getInt("tasks", 1));
-
-    String outputModelPath = conf.get("modelPath");
-    if (outputModelPath == null || outputModelPath.trim().length() == 0) {
-      try {
-        throw new Exception("Please specify output model path.");
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
-    }
-
-    String modelPath = conf.get("existingModelPath");
-    // build model from scratch
-    if (modelPath == null || modelPath.trim().length() == 0) {
-      double learningRate = Double.parseDouble(conf.get("learningRate"));
-      double regularization = Double.parseDouble(conf.get("regularization"));
-      double momentum = Double.parseDouble(conf.get("momentum"));
-      String squashingFunctionName = conf.get("squashingFunctionName");
-      String costFunctionName = conf.get("costFunctionName");
-      String[] layerSizeArrayStr = conf.get("layerSizeArray").trim().split(" ");
-      this.layerSizeArray = new int[layerSizeArrayStr.length];
-      for (int i = 0; i < this.layerSizeArray.length; ++i) {
-        this.layerSizeArray[i] = Integer.parseInt(layerSizeArrayStr[i]);
-      }
-
-      this.inMemoryPerceptron = new SmallMultiLayerPerceptron(learningRate,
-          regularization, momentum, squashingFunctionName, costFunctionName,
-          layerSizeArray);
-      LOG.info("Training model from scratch.");
-    } else { // read model from existing data
-      this.inMemoryPerceptron = new SmallMultiLayerPerceptron(modelPath);
-      LOG.info("Training with existing model.");
-    }
-
-  }
-
-  @Override
-  protected void extraCleanup(
-      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer) {
-    LOG.info(String.format("Task %d totally read %d records.\n",
-        peer.getPeerIndex(), this.numTrainingInstanceRead));
-    // master write learned model to disk
-    if (peer.getPeerIndex() == 0) {
-      try {
-        LOG.info(String.format("Master write learned model to %s\n",
-            conf.get("modelPath")));
-        this.inMemoryPerceptron.writeModelToFile(conf.get("modelPath"));
-      } catch (IOException e) {
-        System.err.println("Please set a correct model path.");
-      }
-    }
-  }
-
-  @Override
-  public void bsp(
-      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer)
-      throws IOException, SyncException, InterruptedException {
-    LOG.info("Start training...");
-    if (trainingMode.equalsIgnoreCase("minibatch.gradient.descent")) {
-      LOG.info("Training Mode: minibatch.gradient.descent");
-      trainByMinibatch(peer);
-    }
-
-    LOG.info(String.format("Task %d finished.", peer.getPeerIndex()));
-  }
-
-  /**
-   * Train the MLP with stochastic gradient descent.
-   * 
-   * @param peer
-   * @throws IOException
-   * @throws SyncException
-   * @throws InterruptedException
-   */
-  private void trainByMinibatch(
-      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer)
-      throws IOException, SyncException, InterruptedException {
-
-    int maxIteration = conf.getInt("training.iteration", 1);
-    LOG.info("# of Training Iteration: " + maxIteration);
-
-    for (int i = 0; i < maxIteration; ++i) {
-      if (peer.getPeerIndex() == 0) {
-        LOG.info(String.format("Iteration [%d] begins...", i));
-      }
-      peer.reopenInput();
-      // reset status
-      if (peer.getPeerIndex() == 0) {
-        this.statusSet = new BitSet(peer.getConfiguration().getInt("tasks", 1));
-      }
-      this.terminateTraining = false;
-      peer.sync();
-      while (true) {
-        // each slate task updates weights according to training data
-        boolean terminate = updateWeights(peer);
-        peer.sync();
-
-        // master merges the updates
-        if (peer.getPeerIndex() == 0) {
-          mergeUpdate(peer);
-        }
-        peer.sync();
-
-        if (terminate) {
-          break;
-        }
-      }
-
-    }
-
-  }
-
-  /**
-   * Merge the updates from slaves task.
-   * 
-   * @param peer
-   * @throws IOException
-   */
-  private void mergeUpdate(
-      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer)
-      throws IOException {
-    // initialize the cache
-    DenseDoubleMatrix[] mergedUpdates = this.getZeroWeightMatrices();
-
-    int numOfPartitions = peer.getNumCurrentMessages();
-
-    // aggregates the weights update
-    while (peer.getNumCurrentMessages() > 0) {
-      SmallMLPMessage message = (SmallMLPMessage) peer.getCurrentMessage();
-      if (message.isTerminated()) {
-        this.statusSet.set(message.getOwner());
-      }
-
-      DenseDoubleMatrix[] weightUpdates = message.getWeightUpdatedMatrices();
-      for (int m = 0; m < mergedUpdates.length; ++m) {
-        mergedUpdates[m] = (DenseDoubleMatrix) mergedUpdates[m]
-            .add(weightUpdates[m]);
-      }
-    }
-
-    if (numOfPartitions != 0) {
-      // calculate the global mean (the mean of batches from all slave tasks) of
-      // the weight updates
-      for (int m = 0; m < mergedUpdates.length; ++m) {
-        mergedUpdates[m] = (DenseDoubleMatrix) mergedUpdates[m]
-            .divide(numOfPartitions);
-      }
-
-      // check if all tasks finishes reading data
-      if (this.statusSet.cardinality() == conf.getInt("tasks", 1)) {
-        this.terminateTraining = true;
-      }
-
-      // update the weight matrices
-      this.inMemoryPerceptron.updateWeightMatrices(mergedUpdates);
-      this.inMemoryPerceptron.setPrevWeightUpdateMatrices(mergedUpdates);
-    }
-
-    // broadcast updated weight matrices
-    for (String peerName : peer.getAllPeerNames()) {
-      SmallMLPMessage msg = new SmallMLPMessage(peer.getPeerIndex(),
-          this.terminateTraining, this.inMemoryPerceptron.getWeightMatrices(),
-          this.inMemoryPerceptron.getPrevWeightUpdateMatrices());
-      peer.send(peerName, msg);
-    }
-
-  }
-
-  /**
-   * Train the MLP with training data.
-   * 
-   * @param peer
-   * @return Whether terminates.
-   * @throws IOException
-   */
-  private boolean updateWeights(
-      BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer)
-      throws IOException {
-    // receive update message sent by master
-    if (peer.getNumCurrentMessages() > 0) {
-      SmallMLPMessage message = (SmallMLPMessage) peer.getCurrentMessage();
-      this.terminateTraining = message.isTerminated();
-      // each slave renew its weight matrices
-      this.inMemoryPerceptron.setWeightMatrices(message
-          .getWeightUpdatedMatrices());
-      this.inMemoryPerceptron.setPrevWeightUpdateMatrices(message
-          .getPrevWeightsUpdatedMatrices());
-      if (this.terminateTraining) {
-        return true;
-      }
-    }
-
-    // update weight according to training data
-    DenseDoubleMatrix[] weightUpdates = this.getZeroWeightMatrices();
-
-    int count = 0;
-    LongWritable recordId = new LongWritable();
-    VectorWritable trainingInstance = new VectorWritable();
-    boolean hasMore = false;
-    while (count++ < this.batchSize) {
-      hasMore = peer.readNext(recordId, trainingInstance);
-
-      try {
-        DenseDoubleMatrix[] singleTrainingInstanceUpdates = this.inMemoryPerceptron
-            .trainByInstance(trainingInstance.getVector());
-        // aggregate the updates
-        for (int m = 0; m < weightUpdates.length; ++m) {
-          weightUpdates[m] = (DenseDoubleMatrix) weightUpdates[m]
-              .add(singleTrainingInstanceUpdates[m]);
-        }
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
-
-      ++numTrainingInstanceRead;
-      if (!hasMore) {
-        break;
-      }
-    }
-
-    // calculate the local mean (the mean of the local batch) of weight updates
-    for (int m = 0; m < weightUpdates.length; ++m) {
-      weightUpdates[m] = (DenseDoubleMatrix) weightUpdates[m].divide(count);
-    }
-
-    LOG.info(String.format("Task %d has read %d records.", peer.getPeerIndex(),
-        this.numTrainingInstanceRead));
-
-    // send the weight updates to master task
-    SmallMLPMessage message = new SmallMLPMessage(peer.getPeerIndex(),
-        !hasMore, weightUpdates);
-    peer.send(peer.getPeerName(0), message); // send status to master
-
-    return !hasMore;
-  }
-
-  /**
-   * Initialize the weight matrices.
-   */
-  private DenseDoubleMatrix[] getZeroWeightMatrices() {
-    DenseDoubleMatrix[] weightUpdateCache = new DenseDoubleMatrix[this.layerSizeArray.length - 1];
-    // initialize weight matrix each layer
-    for (int i = 0; i < weightUpdateCache.length; ++i) {
-      weightUpdateCache[i] = new DenseDoubleMatrix(this.layerSizeArray[i] + 1,
-          this.layerSizeArray[i + 1]);
-    }
-    return weightUpdateCache;
-  }
-
-  /**
-   * Print out the weights.
-   * 
-   * @param mat
-   * @return
-   */
-  protected static String weightsToString(DenseDoubleMatrix[] mat) {
-    StringBuilder sb = new StringBuilder();
-
-    for (int i = 0; i < mat.length; ++i) {
-      sb.append(String.format("Matrix [%d]\n", i));
-      double[][] values = mat[i].getValues();
-      for (double[] value : values) {
-        sb.append(Arrays.toString(value));
-        sb.append('\n');
-      }
-      sb.append('\n');
-    }
-    return sb.toString();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java
----------------------------------------------------------------------
diff --git a/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java b/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java
deleted file mode 100644
index 1b6d200..0000000
--- a/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java
+++ /dev/null
@@ -1,574 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hama.ml.perception;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.commons.lang.SerializationUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableUtils;
-import org.apache.hama.HamaConfiguration;
-import org.apache.hama.bsp.BSPJob;
-import org.apache.hama.commons.io.MatrixWritable;
-import org.apache.hama.commons.io.VectorWritable;
-import org.apache.hama.commons.math.DenseDoubleMatrix;
-import org.apache.hama.commons.math.DenseDoubleVector;
-import org.apache.hama.commons.math.DoubleFunction;
-import org.apache.hama.commons.math.DoubleVector;
-import org.apache.hama.commons.math.FunctionFactory;
-import org.apache.hama.ml.util.FeatureTransformer;
-import org.mortbay.log.Log;
-
-/**
- * SmallMultiLayerPerceptronBSP is a kind of multilayer perceptron whose
- * parameters can be fit into the memory of a single machine. This kind of model
- * can be trained and used more efficiently than the BigMultiLayerPerceptronBSP,
- * whose parameters are distributedly stored in multiple machines.
- * 
- * In general, it it is a multilayer perceptron that consists of one input
- * layer, multiple hidden layer and one output layer.
- * 
- * The number of neurons in the input layer should be consistent with the number
- * of features in the training instance. The number of neurons in the output
- * layer
- */
-public final class SmallMultiLayerPerceptron extends MultiLayerPerceptron
-    implements Writable {
-
-  /* The in-memory weight matrix */
-  private DenseDoubleMatrix[] weightMatrice;
-
-  /* Previous weight updates, used for momentum */
-  private DenseDoubleMatrix[] prevWeightUpdateMatrices;
-
-  /**
-   * @see MultiLayerPerceptron#MultiLayerPerceptron(double, double, double, String, String, int[])
-   */
-  public SmallMultiLayerPerceptron(double learningRate, double regularization,
-      double momentum, String squashingFunctionName, String costFunctionName,
-      int[] layerSizeArray) {
-    super(learningRate, regularization, momentum, squashingFunctionName,
-        costFunctionName, layerSizeArray);
-    initializeWeightMatrix();
-    this.initializePrevWeightUpdateMatrix();
-  }
-
-  /**
-   * @see MultiLayerPerceptron#MultiLayerPerceptron(String)
-   */
-  public SmallMultiLayerPerceptron(String modelPath) {
-    super(modelPath);
-    if (modelPath != null) {
-      try {
-        this.readFromModel();
-        this.initializePrevWeightUpdateMatrix();
-      } catch (IOException e) {
-        e.printStackTrace();
-      }
-    }
-  }
-
-  /**
-   * Initialize weight matrix using Gaussian distribution. Each weight is
-   * initialized in range (-0.5, 0.5)
-   */
-  private void initializeWeightMatrix() {
-    this.weightMatrice = new DenseDoubleMatrix[this.numberOfLayers - 1];
-    // each layer contains one bias neuron
-    for (int i = 0; i < this.numberOfLayers - 1; ++i) {
-      // add weights for bias
-      this.weightMatrice[i] = new DenseDoubleMatrix(this.layerSizeArray[i] + 1,
-          this.layerSizeArray[i + 1]);
-
-      this.weightMatrice[i].applyToElements(new DoubleFunction() {
-
-        private final Random rnd = new Random();
-
-        @Override
-        public double apply(double value) {
-          return rnd.nextDouble() - 0.5;
-        }
-
-        @Override
-        public double applyDerivative(double value) {
-          throw new UnsupportedOperationException("Not supported");
-        }
-
-      });
-
-      // int rowCount = this.weightMatrice[i].getRowCount();
-      // int colCount = this.weightMatrice[i].getColumnCount();
-      // for (int row = 0; row < rowCount; ++row) {
-      // for (int col = 0; col < colCount; ++col) {
-      // this.weightMatrice[i].set(row, col, rnd.nextDouble() - 0.5);
-      // }
-      // }
-    }
-  }
-
-  /**
-   * Initial the momentum weight matrices.
-   */
-  private void initializePrevWeightUpdateMatrix() {
-    this.prevWeightUpdateMatrices = new DenseDoubleMatrix[this.numberOfLayers - 1];
-    for (int i = 0; i < this.prevWeightUpdateMatrices.length; ++i) {
-      int row = this.layerSizeArray[i] + 1;
-      int col = this.layerSizeArray[i + 1];
-      this.prevWeightUpdateMatrices[i] = new DenseDoubleMatrix(row, col);
-    }
-  }
-
-  @Override
-  /**
-   * {@inheritDoc}
-   * The model meta-data is stored in memory.
-   */
-  public DoubleVector outputWrapper(DoubleVector featureVector) {
-    List<double[]> outputCache = this.outputInternal(featureVector);
-    // the output of the last layer is the output of the MLP
-    return new DenseDoubleVector(outputCache.get(outputCache.size() - 1));
-  }
-
-  private List<double[]> outputInternal(DoubleVector featureVector) {
-    // store the output of the hidden layers and output layer, each array store
-    // one layer
-    List<double[]> outputCache = new ArrayList<double[]>();
-
-    // start from the first hidden layer
-    double[] intermediateResults = new double[this.layerSizeArray[0] + 1];
-    if (intermediateResults.length - 1 != featureVector.getDimension()) {
-      throw new IllegalStateException(
-          "Input feature dimension incorrect! The dimension of input layer is "
-              + (this.layerSizeArray[0] - 1)
-              + ", but the dimension of input feature is "
-              + featureVector.getDimension());
-    }
-
-    // fill with input features
-    intermediateResults[0] = 1.0; // bias
-
-    // transform the original features to another space
-    featureVector = this.featureTransformer.transform(featureVector);
-
-    for (int i = 0; i < featureVector.getDimension(); ++i) {
-      intermediateResults[i + 1] = featureVector.get(i);
-    }
-    outputCache.add(intermediateResults);
-
-    // forward the intermediate results to next layer
-    for (int fromLayer = 0; fromLayer < this.numberOfLayers - 1; ++fromLayer) {
-      intermediateResults = forward(fromLayer, intermediateResults);
-      outputCache.add(intermediateResults);
-    }
-
-    return outputCache;
-  }
-
-  /**
-   * Calculate the intermediate results of layer fromLayer + 1.
-   * 
-   * @param fromLayer The index of layer that forwards the intermediate results
-   *          from.
-   * @return the value of intermediate results of layer.
-   */
-  private double[] forward(int fromLayer, double[] intermediateResult) {
-    int toLayer = fromLayer + 1;
-    double[] results = null;
-    int offset = 0;
-
-    if (toLayer < this.layerSizeArray.length - 1) { // add bias if it is not
-                                                    // output layer
-      results = new double[this.layerSizeArray[toLayer] + 1];
-      offset = 1;
-      results[0] = 1.0; // the bias
-    } else {
-      results = new double[this.layerSizeArray[toLayer]]; // no bias
-    }
-
-    for (int neuronIdx = 0; neuronIdx < this.layerSizeArray[toLayer]; ++neuronIdx) {
-      // aggregate the results from previous layer
-      for (int prevNeuronIdx = 0; prevNeuronIdx < this.layerSizeArray[fromLayer] + 1; ++prevNeuronIdx) {
-        results[neuronIdx + offset] += this.weightMatrice[fromLayer].get(
-            prevNeuronIdx, neuronIdx) * intermediateResult[prevNeuronIdx];
-      }
-      // calculate via squashing function
-      results[neuronIdx + offset] = this.squashingFunction
-          .apply(results[neuronIdx + offset]);
-    }
-
-    return results;
-  }
-
-  /**
-   * Get the updated weights using one training instance.
-   * 
-   * @param trainingInstance The trainingInstance is the concatenation of
-   *          feature vector and class label vector.
-   * @return The update of each weight.
-   * @throws Exception
-   */
-  DenseDoubleMatrix[] trainByInstance(DoubleVector trainingInstance)
-      throws Exception {
-    // initialize weight update matrices
-    DenseDoubleMatrix[] weightUpdateMatrices = new DenseDoubleMatrix[this.layerSizeArray.length - 1];
-    for (int m = 0; m < weightUpdateMatrices.length; ++m) {
-      weightUpdateMatrices[m] = new DenseDoubleMatrix(
-          this.layerSizeArray[m] + 1, this.layerSizeArray[m + 1]);
-    }
-
-    if (trainingInstance == null) {
-      return weightUpdateMatrices;
-    }
-
-    // transform the features (exclude the labels) to new space
-    double[] trainingVec = trainingInstance.toArray();
-    double[] trainingFeature = this.featureTransformer.transform(
-        trainingInstance.sliceUnsafe(0, this.layerSizeArray[0] - 1)).toArray();
-    double[] trainingLabels = Arrays.copyOfRange(trainingVec,
-        this.layerSizeArray[0], trainingVec.length);
-
-    DoubleVector trainingFeatureVec = new DenseDoubleVector(trainingFeature);
-    List<double[]> outputCache = this.outputInternal(trainingFeatureVec);
-
-    // calculate the delta of output layer
-    double[] delta = new double[this.layerSizeArray[this.layerSizeArray.length - 1]];
-    double[] outputLayerOutput = outputCache.get(outputCache.size() - 1);
-    double[] lastHiddenLayerOutput = outputCache.get(outputCache.size() - 2);
-
-    DenseDoubleMatrix prevWeightUpdateMatrix = this.prevWeightUpdateMatrices[this.prevWeightUpdateMatrices.length - 1];
-    for (int j = 0; j < delta.length; ++j) {
-      delta[j] = this.costFunction.applyDerivative(trainingLabels[j],
-          outputLayerOutput[j]);
-      // add regularization term
-      if (this.regularization != 0.0) {
-        double derivativeRegularization = 0.0;
-        DenseDoubleMatrix weightMatrix = this.weightMatrice[this.weightMatrice.length - 1];
-        for (int k = 0; k < this.layerSizeArray[this.layerSizeArray.length - 1]; ++k) {
-          derivativeRegularization += weightMatrix.get(k, j);
-        }
-        derivativeRegularization /= this.layerSizeArray[this.layerSizeArray.length - 1];
-        delta[j] += this.regularization * derivativeRegularization;
-      }
-
-      delta[j] *= this.squashingFunction.applyDerivative(outputLayerOutput[j]);
-
-      // calculate the weight update matrix between the last hidden layer and
-      // the output layer
-      for (int i = 0; i < this.layerSizeArray[this.layerSizeArray.length - 2] + 1; ++i) {
-        double updatedValue = -this.learningRate * delta[j]
-            * lastHiddenLayerOutput[i];
-        // add momentum
-        updatedValue += this.momentum * prevWeightUpdateMatrix.get(i, j);
-        weightUpdateMatrices[weightUpdateMatrices.length - 1].set(i, j,
-            updatedValue);
-      }
-    }
-
-    // calculate the delta for each hidden layer through back-propagation
-    for (int l = this.layerSizeArray.length - 2; l >= 1; --l) {
-      delta = backpropagate(l, delta, outputCache, weightUpdateMatrices);
-    }
-
-    return weightUpdateMatrices;
-  }
-
-  /**
-   * Back-propagate the errors from nextLayer to prevLayer. The weight updated
-   * information will be stored in the weightUpdateMatrices, and the delta of
-   * the prevLayer would be returned.
-   * 
-   * @param curLayerIdx The layer index of the current layer.
-   * @param nextLayerDelta The delta of the next layer.
-   * @param outputCache The cache of the output of all the layers.
-   * @param weightUpdateMatrices The weight update matrices.
-   * @return The delta of the previous layer, will be used for next iteration of
-   *         back-propagation.
-   */
-  private double[] backpropagate(int curLayerIdx, double[] nextLayerDelta,
-      List<double[]> outputCache, DenseDoubleMatrix[] weightUpdateMatrices) {
-    int prevLayerIdx = curLayerIdx - 1;
-    double[] delta = new double[this.layerSizeArray[curLayerIdx]];
-    double[] curLayerOutput = outputCache.get(curLayerIdx);
-    double[] prevLayerOutput = outputCache.get(prevLayerIdx);
-
-    // DenseDoubleMatrix prevWeightUpdateMatrix = this.prevWeightUpdateMatrices[curLayerIdx - 1];
-    // for each neuron j in nextLayer, calculate the delta
-    for (int j = 0; j < delta.length; ++j) {
-      // aggregate delta from next layer
-      for (int k = 0; k < nextLayerDelta.length; ++k) {
-        double weight = this.weightMatrice[curLayerIdx].get(j, k);
-        delta[j] += weight * nextLayerDelta[k];
-      }
-      delta[j] *= this.squashingFunction.applyDerivative(curLayerOutput[j + 1]);
-
-      // calculate the weight update matrix between the previous layer and the
-      // current layer
-      for (int i = 0; i < weightUpdateMatrices[prevLayerIdx].getRowCount(); ++i) {
-        double updatedValue = -this.learningRate * delta[j]
-            * prevLayerOutput[i];
-        // add momemtum
-        // updatedValue += this.momentum * prevWeightUpdateMatrix.get(i, j);
-        weightUpdateMatrices[prevLayerIdx].set(i, j, updatedValue);
-      }
-    }
-
-    return delta;
-  }
-
-  @Override
-  /**
-   * {@inheritDoc}
-   */
-  public void train(Path dataInputPath, Map<String, String> trainingParams)
-      throws IOException, InterruptedException, ClassNotFoundException {
-    // create the BSP training job
-    Configuration conf = new Configuration();
-    for (Map.Entry<String, String> entry : trainingParams.entrySet()) {
-      conf.set(entry.getKey(), entry.getValue());
-    }
-
-    // put model related parameters
-    if (modelPath == null || modelPath.trim().length() == 0) { // build model
-                                                               // from scratch
-      conf.set("MLPType", this.MLPType);
-      conf.set("learningRate", "" + this.learningRate);
-      conf.set("regularization", "" + this.regularization);
-      conf.set("momentum", "" + this.momentum);
-      conf.set("squashingFunctionName", this.squashingFunctionName);
-      conf.set("costFunctionName", this.costFunctionName);
-      StringBuilder layerSizeArraySb = new StringBuilder();
-      for (int layerSize : this.layerSizeArray) {
-        layerSizeArraySb.append(layerSize);
-        layerSizeArraySb.append(' ');
-      }
-      conf.set("layerSizeArray", layerSizeArraySb.toString());
-    }
-
-    HamaConfiguration hamaConf = new HamaConfiguration(conf);
-
-    BSPJob job = new BSPJob(hamaConf, SmallMLPTrainer.class);
-    job.setJobName("Small scale MLP training");
-    job.setJarByClass(SmallMLPTrainer.class);
-    job.setBspClass(SmallMLPTrainer.class);
-    job.setInputPath(dataInputPath);
-    job.setInputFormat(org.apache.hama.bsp.SequenceFileInputFormat.class);
-    job.setInputKeyClass(LongWritable.class);
-    job.setInputValueClass(VectorWritable.class);
-    job.setOutputKeyClass(NullWritable.class);
-    job.setOutputValueClass(NullWritable.class);
-    job.setOutputFormat(org.apache.hama.bsp.NullOutputFormat.class);
-
-    int numTasks = conf.getInt("tasks", 1);
-    job.setNumBspTask(numTasks);
-    job.waitForCompletion(true);
-
-    // reload learned model
-    Log.info(String.format("Reload model from %s.",
-        trainingParams.get("modelPath")));
-    this.modelPath = trainingParams.get("modelPath");
-    this.readFromModel();
-  }
-
-  @SuppressWarnings("rawtypes")
-  @Override
-  public void readFields(DataInput input) throws IOException {
-    this.MLPType = WritableUtils.readString(input);
-    this.learningRate = input.readDouble();
-    this.regularization = input.readDouble();
-    this.momentum = input.readDouble();
-    this.numberOfLayers = input.readInt();
-    this.squashingFunctionName = WritableUtils.readString(input);
-    this.costFunctionName = WritableUtils.readString(input);
-
-    this.squashingFunction = FunctionFactory
-        .createDoubleFunction(this.squashingFunctionName);
-    this.costFunction = FunctionFactory
-        .createDoubleDoubleFunction(this.costFunctionName);
-
-    // read the number of neurons for each layer
-    this.layerSizeArray = new int[this.numberOfLayers];
-    for (int i = 0; i < numberOfLayers; ++i) {
-      this.layerSizeArray[i] = input.readInt();
-    }
-    this.weightMatrice = new DenseDoubleMatrix[this.numberOfLayers - 1];
-    for (int i = 0; i < numberOfLayers - 1; ++i) {
-      this.weightMatrice[i] = (DenseDoubleMatrix) MatrixWritable.read(input);
-    }
-
-    // read feature transformer
-    int bytesLen = input.readInt();
-    byte[] featureTransformerBytes = new byte[bytesLen];
-    for (int i = 0; i < featureTransformerBytes.length; ++i) {
-      featureTransformerBytes[i] = input.readByte();
-    }
-    Class featureTransformerCls = (Class) SerializationUtils
-        .deserialize(featureTransformerBytes);
-    Constructor constructor = featureTransformerCls.getConstructors()[0];
-    try {
-      this.featureTransformer = (FeatureTransformer) constructor
-          .newInstance(new Object[] {});
-    } catch (InstantiationException e) {
-      e.printStackTrace();
-    } catch (IllegalAccessException e) {
-      e.printStackTrace();
-    } catch (IllegalArgumentException e) {
-      e.printStackTrace();
-    } catch (InvocationTargetException e) {
-      e.printStackTrace();
-    }
-  }
-
-  @Override
-  public void write(DataOutput output) throws IOException {
-    WritableUtils.writeString(output, MLPType);
-    output.writeDouble(learningRate);
-    output.writeDouble(regularization);
-    output.writeDouble(momentum);
-    output.writeInt(numberOfLayers);
-    WritableUtils.writeString(output, squashingFunctionName);
-    WritableUtils.writeString(output, costFunctionName);
-
-    // write the number of neurons for each layer
-    for (int i = 0; i < this.numberOfLayers; ++i) {
-      output.writeInt(this.layerSizeArray[i]);
-    }
-    for (int i = 0; i < numberOfLayers - 1; ++i) {
-      MatrixWritable matrixWritable = new MatrixWritable(this.weightMatrice[i]);
-      matrixWritable.write(output);
-    }
-
-    // serialize the feature transformer
-    Class<? extends FeatureTransformer> featureTransformerCls = this.featureTransformer
-        .getClass();
-    byte[] featureTransformerBytes = SerializationUtils
-        .serialize(featureTransformerCls);
-    output.writeInt(featureTransformerBytes.length);
-    output.write(featureTransformerBytes);
-  }
-
-  /**
-   * Read the model meta-data from the specified location.
-   * 
-   * @throws IOException
-   */
-  @Override
-  protected void readFromModel() throws IOException {
-    Configuration conf = new Configuration();
-    try {
-      URI uri = new URI(modelPath);
-      FileSystem fs = FileSystem.get(uri, conf);
-      FSDataInputStream is = new FSDataInputStream(fs.open(new Path(modelPath)));
-      this.readFields(is);
-      if (!this.MLPType.equals(this.getClass().getName())) {
-        throw new IllegalStateException(String.format(
-            "Model type incorrect, cannot load model '%s' for '%s'.",
-            this.MLPType, this.getClass().getName()));
-      }
-    } catch (URISyntaxException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Write the model to file.
-   * 
-   * @throws IOException
-   */
-  @Override
-  public void writeModelToFile(String modelPath) throws IOException {
-    Configuration conf = new Configuration();
-    FileSystem fs = FileSystem.get(conf);
-    FSDataOutputStream stream = fs.create(new Path(modelPath), true);
-    this.write(stream);
-    stream.close();
-  }
-
-  DenseDoubleMatrix[] getWeightMatrices() {
-    return this.weightMatrice;
-  }
-
-  DenseDoubleMatrix[] getPrevWeightUpdateMatrices() {
-    return this.prevWeightUpdateMatrices;
-  }
-
-  void setWeightMatrices(DenseDoubleMatrix[] newMatrices) {
-    this.weightMatrice = newMatrices;
-  }
-
-  void setPrevWeightUpdateMatrices(
-      DenseDoubleMatrix[] newPrevWeightUpdateMatrices) {
-    this.prevWeightUpdateMatrices = newPrevWeightUpdateMatrices;
-  }
-
-  /**
-   * Update the weight matrices with given updates.
-   * 
-   * @param updateMatrices The updates weights in matrix format.
-   */
-  void updateWeightMatrices(DenseDoubleMatrix[] updateMatrices) {
-    for (int m = 0; m < this.weightMatrice.length; ++m) {
-      this.weightMatrice[m] = (DenseDoubleMatrix) this.weightMatrice[m]
-          .add(updateMatrices[m]);
-    }
-  }
-
-  /**
-   * Print out the weights.
-   * 
-   * @param mat
-   * @return the weights value.
-   */
-  static String weightsToString(DenseDoubleMatrix[] mat) {
-    StringBuilder sb = new StringBuilder();
-
-    for (int i = 0; i < mat.length; ++i) {
-      sb.append(String.format("Matrix [%d]\n", i));
-      double[][] values = mat[i].getValues();
-      for (double[] value : values) {
-        sb.append(Arrays.toString(value));
-        sb.append('\n');
-      }
-      sb.append('\n');
-    }
-    return sb.toString();
-  }
-
-  @Override
-  protected String getTypeName() {
-    return this.getClass().getName();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java
----------------------------------------------------------------------
diff --git a/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java b/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java
deleted file mode 100644
index 50e3b08..0000000
--- a/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hama.ml.regression;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hama.commons.math.DoubleMatrix;
-import org.apache.hama.commons.math.DoubleVector;
-import org.apache.hama.commons.math.FunctionFactory;
-import org.apache.hama.ml.ann.SmallLayeredNeuralNetwork;
-import org.apache.hama.ml.util.FeatureTransformer;
-
-/**
- * Linear regression model. It can be used for numeric regression or prediction.
- * 
- */
-public class LinearRegression {
-
-  /* Internal model */
-  private final SmallLayeredNeuralNetwork ann;
-
-  public LinearRegression(int dimension) {
-    ann = new SmallLayeredNeuralNetwork();
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.addLayer(1, true,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-  }
-
-  public LinearRegression(String modelPath) {
-    ann = new SmallLayeredNeuralNetwork(modelPath);
-  }
-
-  /**
-   * Set the learning rate, recommend in range (0, 0.01]. Note that linear
-   * regression are easy to get diverge if the learning rate is not small
-   * enough.
-   * 
-   * @param learningRate
-   */
-  public LinearRegression setLearningRate(double learningRate) {
-    ann.setLearningRate(learningRate);
-    return this;
-  }
-
-  /**
-   * Get the learning rate.
-   */
-  public double getLearningRate() {
-    return ann.getLearningRate();
-  }
-
-  /**
-   * Set the weight of the momemtum. Recommend in range [0, 1.0]. Too large
-   * momemtum weight may make model hard to converge.
-   * 
-   * @param momemtumWeight
-   */
-  public LinearRegression setMomemtumWeight(double momemtumWeight) {
-    ann.setMomemtumWeight(momemtumWeight);
-    return this;
-  }
-
-  /**
-   * Get the weight of momemtum.
-   * 
-   * @return the monemtum weight value.
-   */
-  public double getMomemtumWeight() {
-    return ann.getMomemtumWeight();
-  }
-
-  /**
-   * Set the weight of regularization, recommend in range [0, 0.1]. Too large
-   * regularization will mislead the model.
-   * 
-   * @param regularizationWeight
-   */
-  public LinearRegression setRegularizationWeight(double regularizationWeight) {
-    ann.setRegularizationWeight(regularizationWeight);
-    return this;
-  }
-
-  /**
-   * Get the weight of regularization.
-   * 
-   * @return the regularizatioin weight value.
-   */
-  public double getRegularizationWeight() {
-    return ann.getRegularizationWeight();
-  }
-
-  /**
-   * Train the linear regression model with one instance. It is HIGHLY
-   * RECOMMENDED to normalize the data first.
-   * 
-   * @param trainingInstance
-   */
-  public void trainOnline(DoubleVector trainingInstance) {
-    // ann.trainOnline(trainingInstance);
-    DoubleMatrix[] updates = ann.trainByInstance(trainingInstance);
-    // System.out.printf("%s\n", updates[0]);
-    ann.updateWeightMatrices(updates);
-  }
-
-  /**
-   * Train the model with given data. It is HIGHLY RECOMMENDED to normalize the
-   * data first.
-   * 
-   * @param dataInputPath The file path that contains the training instance.
-   * @param trainingParams The training parameters.
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   */
-  public void train(Path dataInputPath, Map<String, String> trainingParams) {
-    ann.train(dataInputPath, trainingParams);
-  }
-
-  /**
-   * Get the output according to given input instance.
-   * 
-   * @param instance
-   * @return a new vector with the result of the operation.
-   */
-  public DoubleVector getOutput(DoubleVector instance) {
-    return ann.getOutput(instance);
-  }
-
-  /**
-   * Set the path to store the model. Note this is just set the path, it does
-   * not save the model. You should call writeModelToFile to save the model.
-   * 
-   * @param modelPath
-   */
-  public void setModelPath(String modelPath) {
-    ann.setModelPath(modelPath);
-  }
-
-  /**
-   * Save the model to specified model path.
-   */
-  public void writeModelToFile() {
-    try {
-      ann.writeModelToFile();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Get the weights of the model.
-   * 
-   * @return a new vector with the weights of the model.
-   */
-  public DoubleVector getWeights() {
-    return ann.getWeightsByLayer(0).getRowVector(0);
-  }
-  
-  /**
-   * Set the feature transformer.
-   * @param featureTransformer
-   */
-  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
-    this.ann.setFeatureTransformer(featureTransformer);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java
----------------------------------------------------------------------
diff --git a/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java b/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java
deleted file mode 100644
index dd990c7..0000000
--- a/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hama.ml.regression;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hama.commons.math.DoubleVector;
-import org.apache.hama.commons.math.FunctionFactory;
-import org.apache.hama.ml.ann.SmallLayeredNeuralNetwork;
-import org.apache.hama.ml.util.FeatureTransformer;
-
-import java.io.IOException;
-import java.util.Map;
-
-/**
- * The logistic regression model. It can be used to conduct 2-class
- * classification.
- * 
- */
-public class LogisticRegression {
-  
-  private final SmallLayeredNeuralNetwork ann;
-  
-  public LogisticRegression(int dimension) {
-    this.ann = new SmallLayeredNeuralNetwork();
-    this.ann.addLayer(dimension, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    this.ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    this.ann.setCostFunction(FunctionFactory.createDoubleDoubleFunction("CrossEntropy"));
-  }
-  
-  public LogisticRegression(String modelPath) {
-    this.ann = new SmallLayeredNeuralNetwork(modelPath);
-  }
-  
-  /**
-   * Set the learning rate, recommend in range (0, 0.01]. Note that linear
-   * regression are easy to get diverge if the learning rate is not small
-   * enough.
-   * 
-   * @param learningRate
-   */
-  public LogisticRegression setLearningRate(double learningRate) {
-    ann.setLearningRate(learningRate);
-    return this;
-  }
-
-  /**
-   * Get the learning rate.
-   */
-  public double getLearningRate() {
-    return ann.getLearningRate();
-  }
-
-  /**
-   * Set the weight of the momemtum. Recommend in range [0, 1.0]. Too large
-   * momemtum weight may make model hard to converge.
-   * 
-   * @param momemtumWeight
-   */
-  public LogisticRegression setMomemtumWeight(double momemtumWeight) {
-    ann.setMomemtumWeight(momemtumWeight);
-    return this;
-  }
-
-  /**
-   * Get the weight of momemtum.
-   * 
-   * @return the monemtum weight value.
-   */
-  public double getMomemtumWeight() {
-    return ann.getMomemtumWeight();
-  }
-
-  /**
-   * Set the weight of regularization, recommend in range [0, 0.1]. Too large
-   * regularization will mislead the model.
-   * 
-   * @param regularizationWeight
-   */
-  public LogisticRegression setRegularizationWeight(double regularizationWeight) {
-    ann.setRegularizationWeight(regularizationWeight);
-    return this;
-  }
-
-  /**
-   * Get the weight of regularization.
-   * 
-   * @return the regularizatioin weight value.
-   */
-  public double getRegularizationWeight() {
-    return ann.getRegularizationWeight();
-  }
-
-  /**
-   * Train the linear regression model with one instance. It is HIGHLY
-   * RECOMMENDED to normalize the data first.
-   * 
-   * @param trainingInstance
-   */
-  public void trainOnline(DoubleVector trainingInstance) {
-    ann.trainOnline(trainingInstance);
-  }
-
-  /**
-   * Train the model with given data. It is HIGHLY RECOMMENDED to normalize the
-   * data first.
-   * 
-   * @param dataInputPath The file path that contains the training instance.
-   * @param trainingParams The training parameters.
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
-   */
-  public void train(Path dataInputPath, Map<String, String> trainingParams) {
-    ann.train(dataInputPath, trainingParams);
-  }
-
-  /**
-   * Get the output according to given input instance.
-   * 
-   * @param instance
-   * @return a new vector with the result of the operation.
-   */
-  public DoubleVector getOutput(DoubleVector instance) {
-    return ann.getOutput(instance);
-  }
-
-  /**
-   * Set the path to store the model. Note this is just set the path, it does
-   * not save the model. You should call writeModelToFile to save the model.
-   * 
-   * @param modelPath
-   */
-  public void setModelPath(String modelPath) {
-    ann.setModelPath(modelPath);
-  }
-
-  /**
-   * Save the model to specified model path.
-   */
-  public void writeModelToFile() {
-    try {
-      ann.writeModelToFile();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Get the weights of the model.
-   * 
-   * @return a new vector with the weights of the model.
-   */
-  public DoubleVector getWeights() {
-    return ann.getWeightsByLayer(0).getRowVector(0);
-  }
-
-  /**
-   * Set the feature transformer.
-   * @param featureTransformer
-   */
-  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
-    this.ann.setFeatureTransformer(featureTransformer);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java
----------------------------------------------------------------------
diff --git a/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java b/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java
deleted file mode 100644
index 0077cb0..0000000
--- a/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hama.ml.ann;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hama.commons.io.VectorWritable;
-import org.apache.hama.commons.math.DenseDoubleVector;
-import org.apache.hama.commons.math.DoubleVector;
-import org.apache.hama.ml.MLTestBase;
-import org.junit.Test;
-import org.mortbay.log.Log;
-
-/**
- * Test the functionality of {@link AutoEncoder}.
- * 
- */
-public class TestAutoEncoder extends MLTestBase {
-
-  @Test
-  public void testAutoEncoderSimple() {
-    double[][] instances = { { 0, 0, 0, 1 }, { 0, 0, 1, 0 }, { 0, 1, 0, 0 },
-        { 0, 0, 0, 0 } };
-    AutoEncoder encoder = new AutoEncoder(4, 2);
-    encoder.setLearningRate(0.5);
-    encoder.setMomemtumWeight(0.2);
-    
-    int maxIteration = 2000;
-    Random rnd = new Random();
-    for (int iteration = 0; iteration < maxIteration; ++iteration) {
-      for (int i = 0; i < instances.length; ++i) {
-        encoder.trainOnline(new DenseDoubleVector(instances[rnd.nextInt(instances.length)]));
-      }
-    }
-
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector encodeVec = encoder.encode(new DenseDoubleVector(
-          instances[i]));
-      DoubleVector decodeVec = encoder.decode(encodeVec);
-      for (int d = 0; d < instances[i].length; ++d) {
-        assertEquals(instances[i][d], decodeVec.get(d), 0.1);
-      }
-    }
-
-  }
-  
-  @Test
-  public void testAutoEncoderSwissRollDataset() {
-    List<double[]> instanceList = new ArrayList<double[]>();
-    try {
-      BufferedReader br = new BufferedReader(new FileReader("src/test/resources/dimensional_reduction.txt"));
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        String[] tokens = line.split("\t");
-        double[] instance = new double[tokens.length];
-        for (int i = 0; i < instance.length; ++i) {
-          instance[i] = Double.parseDouble(tokens[i]);
-        }
-        instanceList.add(instance);
-      }
-      br.close();
-      // normalize instances
-      zeroOneNormalization(instanceList, instanceList.get(0).length);
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (NumberFormatException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-    
-    List<DoubleVector> vecInstanceList = new ArrayList<DoubleVector>();
-    for (double[] instance : instanceList) {
-      vecInstanceList.add(new DenseDoubleVector(instance));
-    }
-    AutoEncoder encoder = new AutoEncoder(3, 2);
-    encoder.setLearningRate(0.05);
-    encoder.setMomemtumWeight(0.1);
-    int maxIteration = 2000;
-    for (int iteration = 0; iteration < maxIteration; ++iteration) {
-      for (DoubleVector vector : vecInstanceList) {
-        encoder.trainOnline(vector);
-      }
-    }
-
-    double errorInstance = 0;
-    for (DoubleVector vector : vecInstanceList) {
-      DoubleVector decoded = encoder.getOutput(vector);
-      DoubleVector diff = vector.subtract(decoded);
-      double error = diff.dot(diff);
-      if (error > 0.1) {
-        ++errorInstance;
-      }
-    }
-    Log.info(String.format("Autoecoder error rate: %f%%\n", errorInstance * 100 / vecInstanceList.size()));
-    
-  }
-  
-  @Test
-  public void testAutoEncoderSwissRollDatasetDistributed() {
-    String strDataPath = "/tmp/dimensional_reduction.txt";
-    Path path = new Path(strDataPath);
-    List<double[]> instanceList = new ArrayList<double[]>();
-    try {
-      Configuration conf = new Configuration();
-      FileSystem fs = FileSystem.get(new URI(strDataPath), conf);
-      if (fs.exists(path)) {
-        fs.delete(path, true);
-      }
-      
-      String line = null;
-      BufferedReader br = new BufferedReader(new FileReader("src/test/resources/dimensional_reduction.txt"));
-      while ((line = br.readLine()) != null) {
-        String[] tokens = line.split("\t");
-        double[] instance = new double[tokens.length];
-        for (int i = 0; i < instance.length; ++i) {
-          instance[i] = Double.parseDouble(tokens[i]);
-        }
-        instanceList.add(instance);
-      }
-      br.close();
-      // normalize instances
-      zeroOneNormalization(instanceList, instanceList.get(0).length);
-      
-      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class);
-      for (int i = 0; i < instanceList.size(); ++i) {
-        DoubleVector vector = new DenseDoubleVector(instanceList.get(i));
-        writer.append(new LongWritable(i), new VectorWritable(vector));
-      }
-      
-      writer.close();
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (URISyntaxException e) {
-      e.printStackTrace();
-    }
-    
-    AutoEncoder encoder = new AutoEncoder(3, 2);
-    String modelPath = "/tmp/autoencoder-modelpath";
-    encoder.setModelPath(modelPath);
-    Map<String, String> trainingParams = new HashMap<String, String>();
-    encoder.setLearningRate(0.5);
-    trainingParams.put("tasks", "5");
-    trainingParams.put("training.max.iterations", "3000");
-    trainingParams.put("training.batch.size", "200");
-    encoder.train(path, trainingParams);
-    
-    double errorInstance = 0;
-    for (double[] instance : instanceList) {
-      DoubleVector vector = new DenseDoubleVector(instance);
-      DoubleVector decoded = encoder.getOutput(vector);
-      DoubleVector diff = vector.subtract(decoded);
-      double error = diff.dot(diff);
-      if (error > 0.1) {
-        ++errorInstance;
-      }
-    }
-    Log.info(String.format("Autoecoder error rate: %f%%\n", errorInstance * 100 / instanceList.size()));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
----------------------------------------------------------------------
diff --git a/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java b/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
deleted file mode 100644
index 8ad88af..0000000
--- a/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
+++ /dev/null
@@ -1,643 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hama.ml.ann;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hama.commons.io.VectorWritable;
-import org.apache.hama.commons.math.DenseDoubleMatrix;
-import org.apache.hama.commons.math.DenseDoubleVector;
-import org.apache.hama.commons.math.DoubleMatrix;
-import org.apache.hama.commons.math.DoubleVector;
-import org.apache.hama.commons.math.FunctionFactory;
-import org.apache.hama.ml.MLTestBase;
-import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.LearningStyle;
-import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.TrainingMethod;
-import org.apache.hama.ml.util.DefaultFeatureTransformer;
-import org.apache.hama.ml.util.FeatureTransformer;
-import org.junit.Test;
-import org.mortbay.log.Log;
-
-/**
- * Test the functionality of SmallLayeredNeuralNetwork.
- * 
- */
-public class TestSmallLayeredNeuralNetwork extends MLTestBase {
-
-  @Test
-  public void testReadWrite() {
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.addLayer(2, false,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.addLayer(5, false,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.addLayer(1, true,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    double learningRate = 0.2;
-    ann.setLearningRate(learningRate);
-    double momentumWeight = 0.5;
-    ann.setMomemtumWeight(momentumWeight);
-    double regularizationWeight = 0.05;
-    ann.setRegularizationWeight(regularizationWeight);
-    // intentionally initialize all weights to 0.5
-    DoubleMatrix[] matrices = new DenseDoubleMatrix[2];
-    matrices[0] = new DenseDoubleMatrix(5, 3, 0.2);
-    matrices[1] = new DenseDoubleMatrix(1, 6, 0.8);
-    ann.setWeightMatrices(matrices);
-    ann.setLearningStyle(LearningStyle.UNSUPERVISED);
-    
-    FeatureTransformer defaultFeatureTransformer = new DefaultFeatureTransformer();
-    ann.setFeatureTransformer(defaultFeatureTransformer);
-    
-
-    // write to file
-    String modelPath = "/tmp/testSmallLayeredNeuralNetworkReadWrite";
-    ann.setModelPath(modelPath);
-    try {
-      ann.writeModelToFile();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-
-    // read from file
-    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
-    assertEquals(annCopy.getClass().getSimpleName(), annCopy.getModelType());
-    assertEquals(modelPath, annCopy.getModelPath());
-    assertEquals(learningRate, annCopy.getLearningRate(), 0.000001);
-    assertEquals(momentumWeight, annCopy.getMomemtumWeight(), 0.000001);
-    assertEquals(regularizationWeight, annCopy.getRegularizationWeight(),
-        0.000001);
-    assertEquals(TrainingMethod.GRADIENT_DESCENT, annCopy.getTrainingMethod());
-    assertEquals(LearningStyle.UNSUPERVISED, annCopy.getLearningStyle());
-
-    // compare weights
-    DoubleMatrix[] weightsMatrices = annCopy.getWeightMatrices();
-    for (int i = 0; i < weightsMatrices.length; ++i) {
-      DoubleMatrix expectMat = matrices[i];
-      DoubleMatrix actualMat = weightsMatrices[i];
-      for (int j = 0; j < expectMat.getRowCount(); ++j) {
-        for (int k = 0; k < expectMat.getColumnCount(); ++k) {
-          assertEquals(expectMat.get(j, k), actualMat.get(j, k), 0.000001);
-        }
-      }
-    }
-    
-    FeatureTransformer copyTransformer = annCopy.getFeatureTransformer();
-    assertEquals(defaultFeatureTransformer.getClass().getName(), copyTransformer.getClass().getName());
-  }
-
-  @Test
-  /**
-   * Test the forward functionality.
-   */
-  public void testOutput() {
-    // first network
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.addLayer(2, false,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.addLayer(5, false,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.addLayer(1, true,
-        FunctionFactory.createDoubleFunction("IdentityFunction"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    ann.setLearningRate(0.1);
-    // intentionally initialize all weights to 0.5
-    DoubleMatrix[] matrices = new DenseDoubleMatrix[2];
-    matrices[0] = new DenseDoubleMatrix(5, 3, 0.5);
-    matrices[1] = new DenseDoubleMatrix(1, 6, 0.5);
-    ann.setWeightMatrices(matrices);
-
-    double[] arr = new double[] { 0, 1 };
-    DoubleVector training = new DenseDoubleVector(arr);
-    DoubleVector result = ann.getOutput(training);
-    assertEquals(1, result.getDimension());
-    // assertEquals(3, result.get(0), 0.000001);
-
-    // second network
-    SmallLayeredNeuralNetwork ann2 = new SmallLayeredNeuralNetwork();
-    ann2.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann2.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann2.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann2.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    ann2.setLearningRate(0.3);
-    // intentionally initialize all weights to 0.5
-    DoubleMatrix[] matrices2 = new DenseDoubleMatrix[2];
-    matrices2[0] = new DenseDoubleMatrix(3, 3, 0.5);
-    matrices2[1] = new DenseDoubleMatrix(1, 4, 0.5);
-    ann2.setWeightMatrices(matrices2);
-
-    double[] test = { 0, 0 };
-    double[] result2 = { 0.807476 };
-
-    DoubleVector vec = ann2.getOutput(new DenseDoubleVector(test));
-    assertArrayEquals(result2, vec.toArray(), 0.000001);
-
-    SmallLayeredNeuralNetwork ann3 = new SmallLayeredNeuralNetwork();
-    ann3.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann3.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann3.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann3.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    ann3.setLearningRate(0.3);
-    // intentionally initialize all weights to 0.5
-    DoubleMatrix[] initMatrices = new DenseDoubleMatrix[2];
-    initMatrices[0] = new DenseDoubleMatrix(3, 3, 0.5);
-    initMatrices[1] = new DenseDoubleMatrix(1, 4, 0.5);
-    ann3.setWeightMatrices(initMatrices);
-
-    double[] instance = { 0, 1 };
-    DoubleVector output = ann3.getOutput(new DenseDoubleVector(instance));
-    assertEquals(0.8315410, output.get(0), 0.000001);
-  }
-
-  @Test
-  public void testXORlocal() {
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    ann.setLearningRate(0.5);
-    ann.setMomemtumWeight(0.0);
-
-    int iterations = 50000; // iteration should be set to a very large number
-    double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } };
-    for (int i = 0; i < iterations; ++i) {
-      DoubleMatrix[] matrices = null;
-      for (int j = 0; j < instances.length; ++j) {
-        matrices = ann.trainByInstance(new DenseDoubleVector(instances[j
-            % instances.length]));
-        ann.updateWeightMatrices(matrices);
-      }
-    }
-
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
-      // the expected output is the last element in array
-      double result = instances[i][2];
-      double actual = ann.getOutput(input).get(0);
-      if (result < 0.5 && actual >= 0.5 || result >= 0.5 && actual < 0.5) {
-        Log.info("Neural network failes to lear the XOR.");
-      }
-    }
-
-    // write model into file and read out
-    String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocal";
-    ann.setModelPath(modelPath);
-    try {
-      ann.writeModelToFile();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
-    // test on instances
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
-      // the expected output is the last element in array
-      double result = instances[i][2];
-      double actual = annCopy.getOutput(input).get(0);
-      if (result < 0.5 && actual >= 0.5 || result >= 0.5 && actual < 0.5) {
-        Log.info("Neural network failes to lear the XOR.");
-      }
-    }
-  }
-
-  @Test
-  public void testXORWithMomentum() {
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    ann.setLearningRate(0.6);
-    ann.setMomemtumWeight(0.3);
-
-    int iterations = 2000; // iteration should be set to a very large number
-    double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } };
-    for (int i = 0; i < iterations; ++i) {
-      for (int j = 0; j < instances.length; ++j) {
-        ann.trainOnline(new DenseDoubleVector(instances[j % instances.length]));
-      }
-    }
-
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
-      // the expected output is the last element in array
-      double result = instances[i][2];
-      double actual = ann.getOutput(input).get(0);
-      if (result < 0.5 && actual >= 0.5 || result >= 0.5 && actual < 0.5) {
-        Log.info("Neural network failes to lear the XOR.");
-      }
-    }
-
-    // write model into file and read out
-    String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocalWithMomentum";
-    ann.setModelPath(modelPath);
-    try {
-      ann.writeModelToFile();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
-    // test on instances
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
-      // the expected output is the last element in array
-      double result = instances[i][2];
-      double actual = annCopy.getOutput(input).get(0);
-      if (result < 0.5 && actual >= 0.5 || result >= 0.5 && actual < 0.5) {
-        Log.info("Neural network failes to lear the XOR.");
-      }
-    }
-  }
-
-  @Test
-  public void testXORLocalWithRegularization() {
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.addLayer(2, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(3, false, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("SquaredError"));
-    ann.setLearningRate(0.7);
-    ann.setMomemtumWeight(0.5);
-    ann.setRegularizationWeight(0.002);
-
-    int iterations = 5000; // iteration should be set to a very large number
-    double[][] instances = { { 0, 1, 1 }, { 0, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 } };
-    for (int i = 0; i < iterations; ++i) {
-      for (int j = 0; j < instances.length; ++j) {
-        ann.trainOnline(new DenseDoubleVector(instances[j % instances.length]));
-      }
-    }
-
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
-      // the expected output is the last element in array
-      double result = instances[i][2];
-      double actual = ann.getOutput(input).get(0);
-      if (result < 0.5 && actual >= 0.5 || result >= 0.5 && actual < 0.5) {
-        Log.info("Neural network failes to lear the XOR.");
-      }
-    }
-
-    // write model into file and read out
-    String modelPath = "/tmp/testSmallLayeredNeuralNetworkXORLocalWithRegularization";
-    ann.setModelPath(modelPath);
-    try {
-      ann.writeModelToFile();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-    SmallLayeredNeuralNetwork annCopy = new SmallLayeredNeuralNetwork(modelPath);
-    // test on instances
-    for (int i = 0; i < instances.length; ++i) {
-      DoubleVector input = new DenseDoubleVector(instances[i]).slice(2);
-      // the expected output is the last element in array
-      double result = instances[i][2];
-      double actual = annCopy.getOutput(input).get(0);
-      if (result < 0.5 && actual >= 0.5 || result >= 0.5 && actual < 0.5) {
-        Log.info("Neural network failes to lear the XOR.");
-      }
-    }
-  }
-
-  @Test
-  public void testTwoClassClassification() {
-    // use logistic regression data
-    String filepath = "src/test/resources/logistic_regression_data.txt";
-    List<double[]> instanceList = new ArrayList<double[]>();
-
-    try {
-      BufferedReader br = new BufferedReader(new FileReader(filepath));
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        String[] tokens = line.trim().split(",");
-        double[] instance = new double[tokens.length];
-        for (int i = 0; i < tokens.length; ++i) {
-          instance[i] = Double.parseDouble(tokens[i]);
-        }
-        instanceList.add(instance);
-      }
-      br.close();
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-
-    zeroOneNormalization(instanceList, instanceList.get(0).length - 1);
-    
-    int dimension = instanceList.get(0).length - 1;
-
-    // divide dataset into training and testing
-    List<double[]> testInstances = new ArrayList<double[]>();
-    testInstances.addAll(instanceList.subList(instanceList.size() - 100,
-        instanceList.size()));
-    List<double[]> trainingInstances = instanceList.subList(0,
-        instanceList.size() - 100);
-
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.setLearningRate(0.001);
-    ann.setMomemtumWeight(0.1);
-    ann.setRegularizationWeight(0.01);
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("CrossEntropy"));
-
-    long start = new Date().getTime();
-    int iterations = 1000;
-    for (int i = 0; i < iterations; ++i) {
-      for (double[] trainingInstance : trainingInstances) {
-        ann.trainOnline(new DenseDoubleVector(trainingInstance));
-      }
-    }
-    long end = new Date().getTime();
-    Log.info(String.format("Training time: %fs\n",
-        (double) (end - start) / 1000));
-
-    double errorRate = 0;
-    // calculate the error on test instance
-    for (double[] testInstance : testInstances) {
-      DoubleVector instance = new DenseDoubleVector(testInstance);
-      double expected = instance.get(instance.getDimension() - 1);
-      instance = instance.slice(instance.getDimension() - 1);
-      double actual = ann.getOutput(instance).get(0);
-      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) {
-        ++errorRate;
-      }
-    }
-    errorRate /= testInstances.size();
-
-    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
-  }
-  
-  @Test
-  public void testLogisticRegression() {
-    this.testLogisticRegressionDistributedVersion();
-    this.testLogisticRegressionDistributedVersionWithFeatureTransformer();
-  }
-
-  public void testLogisticRegressionDistributedVersion() {
-    // write data into a sequence file
-    String tmpStrDatasetPath = "/tmp/logistic_regression_data";
-    Path tmpDatasetPath = new Path(tmpStrDatasetPath);
-    String strDataPath = "src/test/resources/logistic_regression_data.txt";
-    String modelPath = "/tmp/logistic-regression-distributed-model";
-
-    Configuration conf = new Configuration();
-    List<double[]> instanceList = new ArrayList<double[]>();
-    List<double[]> trainingInstances = null;
-    List<double[]> testInstances = null;
-
-    try {
-      FileSystem fs = FileSystem.get(new URI(tmpStrDatasetPath), conf);
-      fs.delete(tmpDatasetPath, true);
-      if (fs.exists(tmpDatasetPath)) {
-        fs.createNewFile(tmpDatasetPath);
-      }
-
-      BufferedReader br = new BufferedReader(new FileReader(strDataPath));
-      String line = null;
-      int count = 0;
-      while ((line = br.readLine()) != null) {
-        String[] tokens = line.trim().split(",");
-        double[] instance = new double[tokens.length];
-        for (int i = 0; i < tokens.length; ++i) {
-          instance[i] = Double.parseDouble(tokens[i]);
-        }
-        instanceList.add(instance);
-      }
-      br.close();
-      
-      zeroOneNormalization(instanceList, instanceList.get(0).length - 1);
-      
-      // write training data to temporal sequence file
-      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
-          tmpDatasetPath, LongWritable.class, VectorWritable.class);
-      int testSize = 150;
-
-      Collections.shuffle(instanceList);
-      testInstances = new ArrayList<double[]>();
-      testInstances.addAll(instanceList.subList(instanceList.size() - testSize,
-          instanceList.size()));
-      trainingInstances = instanceList.subList(0, instanceList.size()
-          - testSize);
-
-      for (double[] instance : trainingInstances) {
-        DoubleVector vec = new DenseDoubleVector(instance);
-        writer.append(new LongWritable(count++), new VectorWritable(vec));
-      }
-      writer.close();
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (URISyntaxException e) {
-      e.printStackTrace();
-    }
-
-    // create model
-    int dimension = 8;
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.setLearningRate(0.7);
-    ann.setMomemtumWeight(0.5);
-    ann.setRegularizationWeight(0.1);
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("CrossEntropy"));
-    ann.setModelPath(modelPath);
-
-    long start = new Date().getTime();
-    Map<String, String> trainingParameters = new HashMap<String, String>();
-    trainingParameters.put("tasks", "5");
-    trainingParameters.put("training.max.iterations", "2000");
-    trainingParameters.put("training.batch.size", "300");
-    trainingParameters.put("convergence.check.interval", "1000");
-    ann.train(tmpDatasetPath, trainingParameters);
-
-    long end = new Date().getTime();
-
-    // validate results
-    double errorRate = 0;
-    // calculate the error on test instance
-    for (double[] testInstance : testInstances) {
-      DoubleVector instance = new DenseDoubleVector(testInstance);
-      double expected = instance.get(instance.getDimension() - 1);
-      instance = instance.slice(instance.getDimension() - 1);
-      double actual = ann.getOutput(instance).get(0);
-      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) {
-        ++errorRate;
-      }
-    }
-    errorRate /= testInstances.size();
-
-    Log.info(String.format("Training time: %fs\n",
-        (double) (end - start) / 1000));
-    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
-  }
-  
-  public void testLogisticRegressionDistributedVersionWithFeatureTransformer() {
-    // write data into a sequence file
-    String tmpStrDatasetPath = "/tmp/logistic_regression_data_feature_transformer";
-    Path tmpDatasetPath = new Path(tmpStrDatasetPath);
-    String strDataPath = "src/test/resources/logistic_regression_data.txt";
-    String modelPath = "/tmp/logistic-regression-distributed-model-feature-transformer";
-
-    Configuration conf = new Configuration();
-    List<double[]> instanceList = new ArrayList<double[]>();
-    List<double[]> trainingInstances = null;
-    List<double[]> testInstances = null;
-
-    try {
-      FileSystem fs = FileSystem.get(new URI(tmpStrDatasetPath), conf);
-      fs.delete(tmpDatasetPath, true);
-      if (fs.exists(tmpDatasetPath)) {
-        fs.createNewFile(tmpDatasetPath);
-      }
-
-      BufferedReader br = new BufferedReader(new FileReader(strDataPath));
-      String line = null;
-      int count = 0;
-      while ((line = br.readLine()) != null) {
-        String[] tokens = line.trim().split(",");
-        double[] instance = new double[tokens.length];
-        for (int i = 0; i < tokens.length; ++i) {
-          instance[i] = Double.parseDouble(tokens[i]);
-        }
-        instanceList.add(instance);
-      }
-      br.close();
-      
-      zeroOneNormalization(instanceList, instanceList.get(0).length - 1);
-      
-      // write training data to temporal sequence file
-      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
-          tmpDatasetPath, LongWritable.class, VectorWritable.class);
-      int testSize = 150;
-
-      Collections.shuffle(instanceList);
-      testInstances = new ArrayList<double[]>();
-      testInstances.addAll(instanceList.subList(instanceList.size() - testSize,
-          instanceList.size()));
-      trainingInstances = instanceList.subList(0, instanceList.size()
-          - testSize);
-
-      for (double[] instance : trainingInstances) {
-        DoubleVector vec = new DenseDoubleVector(instance);
-        writer.append(new LongWritable(count++), new VectorWritable(vec));
-      }
-      writer.close();
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (URISyntaxException e) {
-      e.printStackTrace();
-    }
-
-    // create model
-    int dimension = 8;
-    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
-    ann.setLearningRate(0.7);
-    ann.setMomemtumWeight(0.5);
-    ann.setRegularizationWeight(0.1);
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(dimension, false,
-        FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
-    ann.setCostFunction(FunctionFactory
-        .createDoubleDoubleFunction("CrossEntropy"));
-    ann.setModelPath(modelPath);
-    
-    FeatureTransformer featureTransformer = new DefaultFeatureTransformer();
-    
-    ann.setFeatureTransformer(featureTransformer);
-
-    long start = new Date().getTime();
-    Map<String, String> trainingParameters = new HashMap<String, String>();
-    trainingParameters.put("tasks", "5");
-    trainingParameters.put("training.max.iterations", "2000");
-    trainingParameters.put("training.batch.size", "300");
-    trainingParameters.put("convergence.check.interval", "1000");
-    ann.train(tmpDatasetPath, trainingParameters);
-    
-
-    long end = new Date().getTime();
-
-    // validate results
-    double errorRate = 0;
-    // calculate the error on test instance
-    for (double[] testInstance : testInstances) {
-      DoubleVector instance = new DenseDoubleVector(testInstance);
-      double expected = instance.get(instance.getDimension() - 1);
-      instance = instance.slice(instance.getDimension() - 1);
-      instance = featureTransformer.transform(instance);
-      double actual = ann.getOutput(instance).get(0);
-      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) {
-        ++errorRate;
-      }
-    }
-    errorRate /= testInstances.size();
-
-    Log.info(String.format("Training time: %fs\n",
-        (double) (end - start) / 1000));
-    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hama/blob/3a3ea7a3/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java
----------------------------------------------------------------------
diff --git a/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java b/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java
deleted file mode 100644
index 148be6e..0000000
--- a/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetworkMessage.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hama.ml.ann;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hama.commons.math.DenseDoubleMatrix;
-import org.apache.hama.commons.math.DoubleMatrix;
-import org.junit.Test;
-
-/**
- * Test the functionalities of SmallLayeredNeuralNetworkMessage.
- * 
- */
-public class TestSmallLayeredNeuralNetworkMessage {
-
-  @Test
-  public void testReadWriteWithoutPrev() {
-    double error = 0.22;
-    double[][] matrix1 = new double[][] { { 0.1, 0.2, 0.8, 0.5 },
-        { 0.3, 0.4, 0.6, 0.2 }, { 0.5, 0.6, 0.1, 0.5 } };
-    double[][] matrix2 = new double[][] { { 0.8, 1.2, 0.5 } };
-    DoubleMatrix[] matrices = new DoubleMatrix[2];
-    matrices[0] = new DenseDoubleMatrix(matrix1);
-    matrices[1] = new DenseDoubleMatrix(matrix2);
-
-    boolean isConverge = false;
-
-    SmallLayeredNeuralNetworkMessage message = new SmallLayeredNeuralNetworkMessage(
-        error, isConverge, matrices, null);
-    Configuration conf = new Configuration();
-    String strPath = "/tmp/testReadWriteSmallLayeredNeuralNetworkMessage";
-    Path path = new Path(strPath);
-    try {
-      FileSystem fs = FileSystem.get(new URI(strPath), conf);
-      FSDataOutputStream out = fs.create(path);
-      message.write(out);
-      out.close();
-
-      FSDataInputStream in = fs.open(path);
-      SmallLayeredNeuralNetworkMessage readMessage = new SmallLayeredNeuralNetworkMessage(
-          0, isConverge, null, null);
-      readMessage.readFields(in);
-      in.close();
-      assertEquals(error, readMessage.getTrainingError(), 0.000001);
-      assertFalse(readMessage.isConverge());
-      DoubleMatrix[] readMatrices = readMessage.getCurMatrices();
-      assertEquals(2, readMatrices.length);
-      for (int i = 0; i < readMatrices.length; ++i) {
-        double[][] doubleMatrices = ((DenseDoubleMatrix) readMatrices[i])
-            .getValues();
-        double[][] doubleExpected = ((DenseDoubleMatrix) matrices[i])
-            .getValues();
-        for (int r = 0; r < doubleMatrices.length; ++r) {
-          assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001);
-        }
-      }
-
-      DoubleMatrix[] readPrevMatrices = readMessage.getPrevMatrices();
-      assertNull(readPrevMatrices);
-
-      // delete
-      fs.delete(path, true);
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (URISyntaxException e) {
-      e.printStackTrace();
-    }
-  }
-
-  @Test
-  public void testReadWriteWithPrev() {
-    double error = 0.22;
-    boolean isConverge = true;
-
-    double[][] matrix1 = new double[][] { { 0.1, 0.2, 0.8, 0.5 },
-        { 0.3, 0.4, 0.6, 0.2 }, { 0.5, 0.6, 0.1, 0.5 } };
-    double[][] matrix2 = new double[][] { { 0.8, 1.2, 0.5 } };
-    DoubleMatrix[] matrices = new DoubleMatrix[2];
-    matrices[0] = new DenseDoubleMatrix(matrix1);
-    matrices[1] = new DenseDoubleMatrix(matrix2);
-
-    double[][] prevMatrix1 = new double[][] { { 0.1, 0.1, 0.2, 0.3 },
-        { 0.2, 0.4, 0.1, 0.5 }, { 0.5, 0.1, 0.5, 0.2 } };
-    double[][] prevMatrix2 = new double[][] { { 0.1, 0.2, 0.5, 0.9 },
-        { 0.3, 0.5, 0.2, 0.6 }, { 0.6, 0.8, 0.7, 0.5 } };
-
-    DoubleMatrix[] prevMatrices = new DoubleMatrix[2];
-    prevMatrices[0] = new DenseDoubleMatrix(prevMatrix1);
-    prevMatrices[1] = new DenseDoubleMatrix(prevMatrix2);
-
-    SmallLayeredNeuralNetworkMessage message = new SmallLayeredNeuralNetworkMessage(
-        error, isConverge, matrices, prevMatrices);
-    Configuration conf = new Configuration();
-    String strPath = "/tmp/testReadWriteSmallLayeredNeuralNetworkMessageWithPrev";
-    Path path = new Path(strPath);
-    try {
-      FileSystem fs = FileSystem.get(new URI(strPath), conf);
-      FSDataOutputStream out = fs.create(path);
-      message.write(out);
-      out.close();
-
-      FSDataInputStream in = fs.open(path);
-      SmallLayeredNeuralNetworkMessage readMessage = new SmallLayeredNeuralNetworkMessage(
-          0, isConverge, null, null);
-      readMessage.readFields(in);
-      in.close();
-
-      assertTrue(readMessage.isConverge());
-
-      DoubleMatrix[] readMatrices = readMessage.getCurMatrices();
-      assertEquals(2, readMatrices.length);
-      for (int i = 0; i < readMatrices.length; ++i) {
-        double[][] doubleMatrices = ((DenseDoubleMatrix) readMatrices[i])
-            .getValues();
-        double[][] doubleExpected = ((DenseDoubleMatrix) matrices[i])
-            .getValues();
-        for (int r = 0; r < doubleMatrices.length; ++r) {
-          assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001);
-        }
-      }
-
-      DoubleMatrix[] readPrevMatrices = readMessage.getPrevMatrices();
-      assertEquals(2, readPrevMatrices.length);
-      for (int i = 0; i < readPrevMatrices.length; ++i) {
-        double[][] doubleMatrices = ((DenseDoubleMatrix) readPrevMatrices[i])
-            .getValues();
-        double[][] doubleExpected = ((DenseDoubleMatrix) prevMatrices[i])
-            .getValues();
-        for (int r = 0; r < doubleMatrices.length; ++r) {
-          assertArrayEquals(doubleExpected[r], doubleMatrices[r], 0.000001);
-        }
-      }
-
-      // delete
-      fs.delete(path, true);
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (URISyntaxException e) {
-      e.printStackTrace();
-    }
-  }
-
-}


Mime
View raw message