labs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomm...@apache.org
Subject svn commit: r1716717 - in /labs/yay/trunk: api/src/main/java/org/apache/yay/ core/src/main/java/org/apache/yay/core/ core/src/test/java/org/apache/yay/core/ core/src/test/resources/word2vec/
Date Thu, 26 Nov 2015 15:07:42 GMT
Author: tommaso
Date: Thu Nov 26 15:07:41 2015
New Revision: 1716717

URL: http://svn.apache.org/viewvc?rev=1716717&view=rev
Log:
performance improvements, simpler cost function API

Added:
    labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt   (with props)
Modified:
    labs/yay/trunk/api/src/main/java/org/apache/yay/CostFunction.java
    labs/yay/trunk/api/src/main/java/org/apache/yay/DerivativeUpdateFunction.java
    labs/yay/trunk/api/src/main/java/org/apache/yay/TrainingSet.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/CrossEntropyCostFunction.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/LMSCostFunction.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/BackPropagationLearningStrategyTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/FeedForwardStrategyTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/LogisticRegressionCostFunctionTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/VoidLearningStrategyTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/CostFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/CostFunction.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/CostFunction.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/CostFunction.java Thu Nov 26 15:07:41 2015
@@ -35,17 +35,7 @@ public interface CostFunction<T, I, O> {
    * @return a <code>Double</code> cost
    * @throws Exception if any error occurs during the cost calculation
    */
-  Double calculateAggregatedCost(TrainingSet<I, O> trainingExamples,
+  Double calculateCost(TrainingSet<I, O> trainingExamples,
                                  Hypothesis<T, I, O> hypothesis) throws Exception;
 
-  /**
-   * Calculate the cost of one or more {@link org.apache.yay.TrainingExample}s for a given {@link org.apache.yay.Hypothesis}
-   *
-   * @param hypothesis      the hypothesis
-   * @param trainingExamples some training examples
-   * @return a <code>Double</code> cost
-   * @throws Exception if any error occurs during the cost calculation
-   */
-  Double calculateCost(Hypothesis<T, I, O> hypothesis, TrainingExample<I, O>... trainingExamples) throws Exception;
-
 }

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/DerivativeUpdateFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/DerivativeUpdateFunction.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/DerivativeUpdateFunction.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/DerivativeUpdateFunction.java Thu Nov 26 15:07:41 2015
@@ -19,12 +19,18 @@
 package org.apache.yay;
 
 import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.yay.TrainingSet;
 
 /**
  * Derivatives update function
  */
-public interface DerivativeUpdateFunction<F,O> {
+public interface DerivativeUpdateFunction<F, O> {
 
-  RealMatrix[] updateParameters(RealMatrix[] weights, TrainingSet<F,O> trainingSet);
+  /**
+   * get the updated derivatives
+   *
+   * @param weights     current weights represented as an array of matrices
+   * @param trainingSet the training set
+   * @return an array of matrices representing the updated derivatives for each weight matrix
+   */
+  RealMatrix[] getUpdatedDerivatives(RealMatrix[] weights, TrainingSet<F, O> trainingSet);
 }

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/TrainingSet.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/TrainingSet.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/TrainingSet.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/TrainingSet.java Thu Nov 26 15:07:41 2015
@@ -20,6 +20,7 @@ package org.apache.yay;
 
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.LinkedList;
 
 /**
  * A collection of {@link TrainingExample}s.
@@ -27,6 +28,7 @@ import java.util.Iterator;
 public class TrainingSet<F, O> implements Iterable<TrainingExample<F, O>> {
 
   private final Collection<TrainingExample<F, O>> samples;
+  private Iterator<TrainingExample<F, O>> iterator;
 
   public TrainingSet(Collection<TrainingExample<F, O>> samples) {
     this.samples = samples;
@@ -41,7 +43,26 @@ public class TrainingSet<F, O> implement
     return samples.size();
   }
 
-  public TrainingExample[] toArray() {
-    return samples.toArray(new TrainingExample[size()]);
+  public TrainingSet<F, O> nextBatch(int batch) {
+    return batch > 0 ? getBatch(batch) : this;
+  }
+
+  private TrainingSet<F, O> getBatch(int batch) {
+    Collection<TrainingExample<F, O>> samples = new LinkedList<>();
+    if (iterator == null) {
+      iterator = this.iterator();
+    }
+    for (int i = 0; i < batch; i++) {
+      if (!iterator.hasNext()) {
+        iterator = this.iterator();
+      }
+      samples.add(iterator.next());
+    }
+    return new TrainingSet<F,O>(samples) {
+      @Override
+      public int size() {
+        return samples.size();
+      }
+    };
   }
 }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java Thu Nov 26 15:07:41 2015
@@ -89,24 +89,15 @@ public class BackPropagationLearningStra
       double cost = Double.MAX_VALUE;
       long start = System.currentTimeMillis();
       while (true) {
-        if (iterations > 0 && iterations % (maxIterations / 1000d) == 0) {
+        if (iterations % (1 + (maxIterations / 100)) == 0) {
           long time = (System.currentTimeMillis() - start) / 1000;
-          if (time / 60 > 2) {
-            System.out.println(iterations + " iterations in " + (time / 60) + " minutes (" + ((double) iterations / time) + " ips)");
+          if (time > 60) {
+            System.out.println("cost is " + cost + " after " + iterations + " iterations in " + (time / 60) + " minutes (" + ((double) iterations / time) + " ips)");
           }
         }
-        TrainingExample<Double, Double>[] miniBatch = batch > 0 ? new TrainingExample[batch] : trainingExamples.toArray();
-        if (batch > 0) {
-          for (int i = 0; i < batch; i++) {
-            if (!iterator.hasNext()) {
-              iterator = trainingExamples.iterator();
-            }
-            miniBatch[i] = iterator.next();
-          }
-        }
-
         // calculate cost
-        double newCost = costFunction.calculateCost(neuralNetwork, miniBatch);
+        TrainingSet<Double, Double> nextBatch = trainingExamples.nextBatch(batch);
+        double newCost = costFunction.calculateCost(nextBatch, neuralNetwork);
 
         if (Double.POSITIVE_INFINITY == newCost || newCost > cost && batch == -1) {
           throw new RuntimeException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + newCost);
@@ -120,10 +111,8 @@ public class BackPropagationLearningStra
         // update registered cost
         cost = newCost;
 
-        TrainingSet<Double, Double> trainingSet = batch < 0 ? trainingExamples : new TrainingSet<>(Arrays.asList(miniBatch));
-
         // calculate the derivatives to update the parameters
-        RealMatrix[] derivatives = derivativeUpdateFunction.updateParameters(weightsMatrixSet, trainingSet);
+        RealMatrix[] derivatives = derivativeUpdateFunction.getUpdatedDerivatives(weightsMatrixSet, nextBatch);
 
         // calculate the updated parameters
         updatedWeights = updateWeights(updatedWeights, derivatives, alpha);

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/CrossEntropyCostFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/CrossEntropyCostFunction.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/CrossEntropyCostFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/CrossEntropyCostFunction.java Thu Nov 26 15:07:41 2015
@@ -30,21 +30,8 @@ import org.apache.yay.TrainingSet;
  */
 public class CrossEntropyCostFunction implements NeuralNetworkCostFunction {
 
-  @Override
-  public Double calculateAggregatedCost(TrainingSet<Double, Double> trainingSet,
-                                        Hypothesis<RealMatrix, Double, Double> hypothesis) throws Exception {
-    TrainingExample<Double, Double>[] samples = new TrainingExample[trainingSet.size()];
-    int i = 0;
-    for (TrainingExample<Double, Double> sample : trainingSet) {
-      samples[i] = sample;
-      i++;
-    }
-    return calculateCost(hypothesis, samples);
-  }
-
-  @SafeVarargs
-  private final Double calculateErrorTerm(Hypothesis<RealMatrix, Double, Double> hypothesis,
-                                          TrainingExample<Double, Double>... trainingExamples) throws PredictionException {
+  private Double calculateErrorTerm(Hypothesis<RealMatrix, Double, Double> hypothesis,
+                                    TrainingSet<Double, Double> trainingExamples) throws PredictionException {
     Double res = 0d;
 
     for (TrainingExample<Double, Double> input : trainingExamples) {
@@ -59,9 +46,9 @@ public class CrossEntropyCostFunction im
     return res;
   }
 
-  @SafeVarargs
   @Override
-  public final Double calculateCost(Hypothesis<RealMatrix, Double, Double> hypothesis, TrainingExample<Double, Double>... trainingExamples) throws Exception {
-    return calculateErrorTerm(hypothesis, trainingExamples);
+  public final Double calculateCost(TrainingSet<Double, Double> trainingSet,
+                                    Hypothesis<RealMatrix, Double, Double> hypothesis) throws Exception {
+    return calculateErrorTerm(hypothesis, trainingSet);
   }
 }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java Thu Nov 26 15:07:41 2015
@@ -18,8 +18,9 @@
  */
 package org.apache.yay.core;
 
-import org.apache.commons.math3.linear.ArrayRealVector;
+import org.apache.commons.math3.linear.OpenMapRealVector;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealMatrixChangingVisitor;
 import org.apache.commons.math3.linear.RealVector;
 import org.apache.yay.DerivativeUpdateFunction;
 import org.apache.yay.Feature;
@@ -29,7 +30,6 @@ import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ConversionUtils;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 
 /**
@@ -44,13 +44,12 @@ class DefaultDerivativeUpdateFunction im
   }
 
   @Override
-  public RealMatrix[] updateParameters(RealMatrix[] weightsMatrixSet, TrainingSet<Double, Double> trainingExamples) {
+  public RealMatrix[] getUpdatedDerivatives(RealMatrix[] weightsMatrixSet, TrainingSet<Double, Double> trainingExamples) {
     // set up the accumulator matrix(es)
     RealMatrix[] triangle = new RealMatrix[weightsMatrixSet.length];
     RealVector[] deltaVectors = new RealVector[weightsMatrixSet.length];
 
     int noOfMatrixes = weightsMatrixSet.length - 1;
-    double count = 0;
     for (TrainingExample<Double, Double> trainingExample : trainingExamples) {
       try {
         // get activations from feed forward propagation
@@ -62,36 +61,56 @@ class DefaultDerivativeUpdateFunction im
         // calculate output error (corresponding to the last delta^l)
         RealVector nextLayerDelta = calculateOutputError(trainingExample, activations);
 
-        deltaVectors[noOfMatrixes] = nextLayerDelta;
-
-        // back prop the error and update the deltas accordingly
-        for (int l = noOfMatrixes; l > 0; l--) {
-          RealVector currentActivationsVector = activations[l - 1];
-          nextLayerDelta = calculateDeltaVector(weightsMatrixSet[l], currentActivationsVector, nextLayerDelta);
-
-          // collect delta vectors for this example
-          deltaVectors[l - 1] = nextLayerDelta;
-        }
+        updateDeltaVectors(weightsMatrixSet, deltaVectors, noOfMatrixes, activations, nextLayerDelta);
 
         RealVector[] newActivations = new RealVector[activations.length];
         newActivations[0] = ConversionUtils.toRealVector(input);
         System.arraycopy(activations, 0, newActivations, 1, activations.length - 1);
 
-
         // update triangle (big delta matrix)
         updateTriangle(triangle, newActivations, deltaVectors);
       } catch (Exception e) {
         throw new RuntimeException("error during derivatives calculation", e);
       }
-      count++;
     }
 
-    RealMatrix[] derivatives = new RealMatrix[triangle.length];
-    for (int i = 0; i < triangle.length; i++) {
-      // TODO : introduce regularization diversification on bias term (currently not regularized)
-      derivatives[i] = triangle[i].scalarMultiply(1d / count);
+    for (RealMatrix aTriangle : triangle) {
+      aTriangle.walkInOptimizedOrder(new RealMatrixChangingVisitor() {
+        @Override
+        public void start(int rows, int columns, int startRow, int endRow, int startColumn, int endColumn) {
+
+        }
+
+        @Override
+        public double visit(int row, int column, double value) {
+          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
+            return value / trainingExamples.size();
+          } else {
+            return value;
+          }
+        }
+
+        @Override
+        public double end() {
+          return 0;
+        }
+      });
+    }
+    return triangle;
+  }
+
+  private void updateDeltaVectors(RealMatrix[] weightsMatrixSet, RealVector[] deltaVectors, int noOfMatrixes, RealVector[] activations,
+                                  RealVector nextLayerDelta) {
+    deltaVectors[noOfMatrixes] = nextLayerDelta;
+
+    // back prop the error and update the deltas accordingly
+    for (int l = noOfMatrixes; l > 0; l--) {
+      RealVector currentActivationsVector = activations[l - 1];
+      nextLayerDelta = calculateDeltaVector(weightsMatrixSet[l], currentActivationsVector, nextLayerDelta);
+
+      // collect delta vectors for this example
+      deltaVectors[l - 1] = nextLayerDelta;
     }
-    return derivatives;
   }
 
   private void updateTriangle(RealMatrix[] triangle, RealVector[] activations, RealVector[] deltaVectors) {
@@ -107,7 +126,7 @@ class DefaultDerivativeUpdateFunction im
 
   private RealVector calculateDeltaVector(RealMatrix thetaL, RealVector activationsVector, RealVector nextLayerDelta) {
     // TODO : remove the bias term from the error calculations
-    ArrayRealVector identity = new ArrayRealVector(activationsVector.getDimension(), 1d);
+    RealVector identity = new OpenMapRealVector(activationsVector.getDimension(), 1d);
     RealVector gz = activationsVector.ebeMultiply(identity.subtract(activationsVector)); // = a^l .* (1-a^l)
     return thetaL.preMultiply(nextLayerDelta).ebeMultiply(gz);
   }
@@ -116,12 +135,11 @@ class DefaultDerivativeUpdateFunction im
     RealVector output = activations[activations.length - 1];
 
     Double[] actualOutput = trainingExample.getOutput();
-    RealVector learnedOutputRealVector = new ArrayRealVector(actualOutput); // turn example output to a vector
-
-    double[] ones = new double[output.getDimension()];
-    Arrays.fill(ones, 1d);
+    RealVector learnedOutputRealVector = new OpenMapRealVector(actualOutput); // turn example output to a vector
 
     // error calculation -> er_a = out_a * (1 - out_a) * (tgt_a - out_a) (was: output.subtract(learnedOutputRealVector)
-    return output.ebeMultiply(new ArrayRealVector(ones).subtract(output)).ebeMultiply(output.subtract(learnedOutputRealVector));
+    // targetOutputRealVector.subtract(output).map(x -> Math.pow(x, 2)); // squared error
+    // return output.subtract(learnedOutputRealVector);
+    return output.ebeMultiply(new OpenMapRealVector(output.getDimension(), 1d).subtract(output)).ebeMultiply(output.subtract(learnedOutputRealVector));
   }
 }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/LMSCostFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/LMSCostFunction.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/LMSCostFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/LMSCostFunction.java Thu Nov 26 15:07:41 2015
@@ -18,7 +18,7 @@
  */
 package org.apache.yay.core;
 
-import org.apache.commons.math3.linear.ArrayRealVector;
+import org.apache.commons.math3.linear.OpenMapRealVector;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.linear.RealVector;
 import org.apache.yay.Hypothesis;
@@ -30,26 +30,16 @@ import org.apache.yay.TrainingSet;
  * Least mean square cost function
  */
 public class LMSCostFunction implements NeuralNetworkCostFunction {
-  @Override
-  public Double calculateAggregatedCost(TrainingSet<Double, Double> trainingExamples, Hypothesis<RealMatrix, Double, Double> hypothesis) throws Exception {
-    TrainingExample<Double, Double>[] samples = new TrainingExample[trainingExamples.size()];
-    int i = 0;
-    for (TrainingExample<Double, Double> sample : trainingExamples) {
-      samples[i] = sample;
-      i++;
-    }
-    return calculateCost(hypothesis, samples);
-  }
 
-  @SafeVarargs
   @Override
-  public final Double calculateCost(Hypothesis<RealMatrix, Double, Double> hypothesis, TrainingExample<Double, Double>... trainingExamples) throws Exception {
+  public final Double calculateCost(TrainingSet<Double, Double> trainingSet,
+                                    Hypothesis<RealMatrix, Double, Double> hypothesis) throws Exception {
     Double cost = 0d;
-    for (TrainingExample<Double, Double> example : trainingExamples) {
+    for (TrainingExample<Double, Double> example : trainingSet) {
       Double[] actualOutput = example.getOutput();
       Double[] predictedOutput = hypothesis.predict(example);
-      RealVector actualVector = new ArrayRealVector(actualOutput);
-      RealVector predictedVector = new ArrayRealVector(predictedOutput);
+      RealVector actualVector = new OpenMapRealVector(actualOutput);
+      RealVector predictedVector = new OpenMapRealVector(predictedOutput);
       RealVector diffVector = actualVector.subtract(predictedVector);
       for (int i = 0; i < diffVector.getDimension(); i++) {
         double entry = diffVector.getEntry(i);

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java Thu Nov 26 15:07:41 2015
@@ -42,21 +42,8 @@ public class LogisticRegressionCostFunct
     this.lambda = DEFAULT_LAMBDA;
   }
 
-  @Override
-  public Double calculateAggregatedCost(TrainingSet<Double, Double> trainingSet,
-                                        Hypothesis<RealMatrix, Double, Double> hypothesis) throws Exception {
-    TrainingExample<Double, Double>[] samples = new TrainingExample[trainingSet.size()];
-    int i = 0;
-    for (TrainingExample<Double, Double> sample : trainingSet) {
-      samples[i] = sample;
-      i++;
-    }
-    return calculateCost(hypothesis, samples);
-  }
-
-  @SafeVarargs
-  private final Double calculateRegularizationTerm(Hypothesis<RealMatrix, Double, Double> hypothesis,
-                                                   TrainingExample<Double, Double>... trainingExamples) {
+  private Double calculateRegularizationTerm(Hypothesis<RealMatrix, Double, Double> hypothesis,
+                                                   TrainingSet<Double, Double> trainingExamples) {
     Double res = 1d;
     for (RealMatrix layerMatrix : hypothesis.getParameters()) {
       res += layerMatrix.walkInOptimizedOrder(new RealMatrixPreservingVisitor() {
@@ -88,12 +75,11 @@ public class LogisticRegressionCostFunct
 //        }
 //      }
     }
-    return (lambda / (2d * trainingExamples.length)) * res;
+    return (lambda / (2d * trainingExamples.size())) * res;
   }
 
-  @SafeVarargs
-  private final Double calculateErrorTerm(Hypothesis<RealMatrix, Double, Double> hypothesis,
-                                          TrainingExample<Double, Double>... trainingExamples) throws PredictionException {
+  private Double calculateErrorTerm(Hypothesis<RealMatrix, Double, Double> hypothesis,
+                                          TrainingSet<Double, Double> trainingExamples) throws PredictionException {
     Double res = 0d;
 
     for (TrainingExample<Double, Double> input : trainingExamples) {
@@ -106,12 +92,11 @@ public class LogisticRegressionCostFunct
                 * Math.log(1d - po);
       }
     }
-    return (-1d / trainingExamples.length) * res;
+    return (-1d / trainingExamples.size()) * res;
   }
 
-  @SafeVarargs
   @Override
-  public final Double calculateCost(Hypothesis<RealMatrix, Double, Double> hypothesis, TrainingExample<Double, Double>... trainingExamples) throws Exception {
+  public final Double calculateCost(TrainingSet<Double, Double> trainingExamples, Hypothesis<RealMatrix, Double, Double> hypothesis) throws Exception {
     Double errorTerm = calculateErrorTerm(hypothesis, trainingExamples);
     Double regularizationTerm = calculateRegularizationTerm(hypothesis, trainingExamples);
     return errorTerm + regularizationTerm;

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/BackPropagationLearningStrategyTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/BackPropagationLearningStrategyTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/BackPropagationLearningStrategyTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/BackPropagationLearningStrategyTest.java Thu Nov 26 15:07:41 2015
@@ -44,7 +44,7 @@ public class BackPropagationLearningStra
     RealMatrix[] initialWeights = createRandomWeights(2);
 
     Collection<TrainingExample<Double, Double>> samples = createSamples(1000, initialWeights[0].getColumnDimension() - 1, 2);
-    TrainingSet<Double, Double> trainingSet = new TrainingSet<Double, Double>(samples);
+    TrainingSet<Double, Double> trainingSet = new TrainingSet<>(samples);
     RealMatrix[] learntWeights = backPropagationLearningStrategy.learnWeights(initialWeights, trainingSet);
     assertNotNull(learntWeights);
 
@@ -65,7 +65,7 @@ public class BackPropagationLearningStra
     RealMatrix[] initialWeights = createRandomWeights(10);
 
     Collection<TrainingExample<Double, Double>> samples = createSamples(1000, initialWeights[0].getColumnDimension() - 1, 10);
-    TrainingSet<Double, Double> trainingSet = new TrainingSet<Double, Double>(samples);
+    TrainingSet<Double, Double> trainingSet = new TrainingSet<>(samples);
     RealMatrix[] learntWeights = backPropagationLearningStrategy.learnWeights(initialWeights, trainingSet);
     assertNotNull(learntWeights);
 
@@ -139,7 +139,7 @@ public class BackPropagationLearningStra
     initialWeights[2] = new Array2DRowRealMatrix(new double[][]{{1d, 2d, 0.3d, 0.5d}}); // 1 x 4
 
     Collection<TrainingExample<Double, Double>> samples = createSamples(100, 2, 1);
-    TrainingSet<Double, Double> trainingSet = new TrainingSet<Double, Double>(samples);
+    TrainingSet<Double, Double> trainingSet = new TrainingSet<>(samples);
     RealMatrix[] learntWeights = backPropagationLearningStrategy.learnWeights(initialWeights, trainingSet);
     assertNotNull(learntWeights);
 
@@ -163,7 +163,7 @@ public class BackPropagationLearningStra
   public void testLearningWithRandomSamples() throws Exception {
     PredictionStrategy<Double, Double> predictionStrategy = new FeedForwardStrategy(new SigmoidFunction());
     BackPropagationLearningStrategy backPropagationLearningStrategy =
-            new BackPropagationLearningStrategy(0.1d, 0.0003d, predictionStrategy, new LogisticRegressionCostFunction(0.5d));
+            new BackPropagationLearningStrategy(0.001d, 0.0003d, predictionStrategy, new LogisticRegressionCostFunction(0.5d));
 
     // 3 input units, 3 hidden units, 4 hidden units, 1 output unit
     RealMatrix[] initialWeights = new RealMatrix[3];
@@ -172,7 +172,7 @@ public class BackPropagationLearningStra
     initialWeights[2] = new Array2DRowRealMatrix(new double[][]{{1d, 2d, 0.3d, 0.5d}});
 
     Collection<TrainingExample<Double, Double>> samples = createSamples(10000, 2, 1);
-    TrainingSet<Double, Double> trainingSet = new TrainingSet<Double, Double>(samples);
+    TrainingSet<Double, Double> trainingSet = new TrainingSet<>(samples);
     RealMatrix[] learntWeights = backPropagationLearningStrategy.learnWeights(initialWeights, trainingSet);
     assertNotNull(learntWeights);
 
@@ -204,7 +204,7 @@ public class BackPropagationLearningStra
     });
 
     Collection<TrainingExample<Double, Double>> samples = createSamples(1000000, 2, 1);
-    TrainingSet<Double, Double> trainingSet = new TrainingSet<Double, Double>(samples);
+    TrainingSet<Double, Double> trainingSet = new TrainingSet<>(samples);
     RealMatrix[] learntWeights = backPropagationLearningStrategy.learnWeights(initialWeights, trainingSet);
     assertNotNull(learntWeights);
 
@@ -214,7 +214,7 @@ public class BackPropagationLearningStra
   }
 
   private Collection<TrainingExample<Double, Double>> createSamples(int size, int noOfFeatures, int noOfOutputs) {
-    Collection<TrainingExample<Double, Double>> trainingExamples = new ArrayList<TrainingExample<Double, Double>>(size);
+    Collection<TrainingExample<Double, Double>> trainingExamples = new ArrayList<>(size);
     for (int i = 0; i < size; i++) {
       Double[] featureValues = new Double[noOfFeatures];
       for (int j = 0; j < noOfFeatures; j++) {

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java Thu Nov 26 15:07:41 2015
@@ -41,14 +41,14 @@ public class BasicPerceptronTest {
 
   @Before
   public void setUp() throws Exception {
-    Collection<TrainingExample<Double, Double>> samples = new LinkedList<TrainingExample<Double, Double>>();
+    Collection<TrainingExample<Double, Double>> samples = new LinkedList<>();
     samples.add(createTrainingExample(1d, 4d, 5d, 6d));
     samples.add(createTrainingExample(1d, 5d, 6d, 0.5d));
     samples.add(createTrainingExample(0.1d, 9d, 4d, 1.9d));
     samples.add(createTrainingExample(0.11d, 4d, 2.6d, 9.5d));
-    smallDataset = new TrainingSet<Double, Double>(samples);
+    smallDataset = new TrainingSet<>(samples);
 
-    Collection<TrainingExample<Double, Double>> samples1 = new LinkedList<TrainingExample<Double, Double>>();
+    Collection<TrainingExample<Double, Double>> samples1 = new LinkedList<>();
     Random r = new Random();
     for (int i = 0; i < 100000; i++) {
       samples1.add(createTrainingExample(1d, r.nextDouble(),
@@ -58,7 +58,7 @@ public class BasicPerceptronTest {
               r.nextDouble(), r.nextDouble(), r.nextDouble(), r.nextDouble(),
               r.nextDouble(), r.nextDouble()));
     }
-    bigDataset = new TrainingSet<Double, Double>(samples1);
+    bigDataset = new TrainingSet<>(samples1);
 
   }
 
@@ -144,12 +144,12 @@ public class BasicPerceptronTest {
     return new TrainingExample<Double, Double>() {
       @Override
       public ArrayList<Feature<Double>> getFeatures() {
-        ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
-        Feature<Double> byasFeature = new Feature<Double>();
+        ArrayList<Feature<Double>> features = new ArrayList<>();
+        Feature<Double> byasFeature = new Feature<>();
         byasFeature.setValue(1d);
         features.add(byasFeature);
         for (Double d : params) {
-          Feature<Double> feature = new Feature<Double>();
+          Feature<Double> feature = new Feature<>();
           feature.setValue(d);
           features.add(feature);
         }

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/FeedForwardStrategyTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/FeedForwardStrategyTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/FeedForwardStrategyTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/FeedForwardStrategyTest.java Thu Nov 26 15:07:41 2015
@@ -39,7 +39,7 @@ public class FeedForwardStrategyTest {
     weights[0] = new Array2DRowRealMatrix(new double[][]{{1d, 1d, 2d, 3d}, {1d, 1d, 2d, 3d}, {1d, 1d, 2d, 3d}});
     weights[1] = new Array2DRowRealMatrix(new double[][]{{1d, 2d, 3d}});
 
-    Collection<Double> inputs = new LinkedList<Double>();
+    Collection<Double> inputs = new LinkedList<>();
     inputs.add(1d);
     inputs.add(2d);
     inputs.add(-5d);

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/LogisticRegressionCostFunctionTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/LogisticRegressionCostFunctionTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/LogisticRegressionCostFunctionTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/LogisticRegressionCostFunctionTest.java Thu Nov 26 15:07:41 2015
@@ -44,7 +44,7 @@ public class LogisticRegressionCostFunct
   public void setUp() throws Exception {
 
     costFunction = new LogisticRegressionCostFunction(0.1d);
-    Collection<TrainingExample<Double, Double>> trainingExamples = new LinkedList<TrainingExample<Double, Double>>();
+    Collection<TrainingExample<Double, Double>> trainingExamples = new LinkedList<>();
     TrainingExample<Double, Double> example1 = ExamplesFactory.createDoubleTrainingExample(1d, 0d, 1d);
     TrainingExample<Double, Double> example2 = ExamplesFactory.createDoubleTrainingExample(1d, 1d, 1d);
     TrainingExample<Double, Double> example3 = ExamplesFactory.createDoubleTrainingExample(0d, 1d, 1d);
@@ -53,7 +53,7 @@ public class LogisticRegressionCostFunct
     trainingExamples.add(example2);
     trainingExamples.add(example3);
     trainingExamples.add(example4);
-    trainingSet = new TrainingSet<Double, Double>(trainingExamples);
+    trainingSet = new TrainingSet<>(trainingExamples);
 
   }
 
@@ -64,9 +64,9 @@ public class LogisticRegressionCostFunct
     final RealMatrix[] orWeightsMatrixSet = new RealMatrix[]{singleOrLayerWeights};
 
     final NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(orWeightsMatrixSet,
-            new VoidLearningStrategy<Double, Double>(), new FeedForwardStrategy(new SigmoidFunction()));
+            new VoidLearningStrategy<>(), new FeedForwardStrategy(new SigmoidFunction()));
 
-    Double cost = costFunction.calculateAggregatedCost(trainingSet, neuralNetwork);
+    Double cost = costFunction.calculateCost(trainingSet, neuralNetwork);
     assertTrue("cost should not be negative", cost > 0d);
   }
 

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java Thu Nov 26 15:07:41 2015
@@ -43,11 +43,11 @@ public class NeuralNetworkIntegrationTes
     double[][] weights = {{-30d, 20d, 20d}};
     RealMatrix singleAndLayerWeights = new Array2DRowRealMatrix(weights);
     RealMatrix[] andRealMatrixSet = new RealMatrix[]{singleAndLayerWeights};
-    NeuralNetwork andNN = createNN(andRealMatrixSet, new VoidLearningStrategy<Double, Double>());
-    assertEquals(0l, Math.round(andNN.predict(createSample(1d, 0d))[0]));
-    assertEquals(0l, Math.round(andNN.predict(createSample(0d, 1d))[0]));
-    assertEquals(0l, Math.round(andNN.predict(createSample(0d, 0d))[0]));
-    assertEquals(1l, Math.round(andNN.predict(createSample(1d, 1d))[0]));
+    NeuralNetwork andNN = createNN(andRealMatrixSet, new VoidLearningStrategy<>());
+    assertEquals(0L, Math.round(andNN.predict(createSample(1d, 0d))[0]));
+    assertEquals(0L, Math.round(andNN.predict(createSample(0d, 1d))[0]));
+    assertEquals(0L, Math.round(andNN.predict(createSample(0d, 0d))[0]));
+    assertEquals(1L, Math.round(andNN.predict(createSample(1d, 1d))[0]));
   }
 
   @Test
@@ -55,11 +55,11 @@ public class NeuralNetworkIntegrationTes
     double[][] weights = {{-10d, 20d, 20d}};
     RealMatrix singleOrLayerWeights = new Array2DRowRealMatrix(weights);
     RealMatrix[] orRealMatrixSet = new RealMatrix[]{singleOrLayerWeights};
-    NeuralNetwork orNN = createNN(orRealMatrixSet, new VoidLearningStrategy<Double, Double>());
-    assertEquals(1l, Math.round(orNN.predict(createSample(1d, 0d))[0]));
-    assertEquals(1l, Math.round(orNN.predict(createSample(0d, 1d))[0]));
-    assertEquals(0l, Math.round(orNN.predict(createSample(0d, 0d))[0]));
-    assertEquals(1l, Math.round(orNN.predict(createSample(1d, 1d))[0]));
+    NeuralNetwork orNN = createNN(orRealMatrixSet, new VoidLearningStrategy<>());
+    assertEquals(1L, Math.round(orNN.predict(createSample(1d, 0d))[0]));
+    assertEquals(1L, Math.round(orNN.predict(createSample(0d, 1d))[0]));
+    assertEquals(0L, Math.round(orNN.predict(createSample(0d, 0d))[0]));
+    assertEquals(1L, Math.round(orNN.predict(createSample(1d, 1d))[0]));
   }
 
   @Test
@@ -67,9 +67,9 @@ public class NeuralNetworkIntegrationTes
     double[][] weights = {{10d, -20d}};
     RealMatrix singleNotLayerWeights = new Array2DRowRealMatrix(weights);
     RealMatrix[] notRealMatrixSet = new RealMatrix[]{singleNotLayerWeights};
-    NeuralNetwork orNN = createNN(notRealMatrixSet, new VoidLearningStrategy<Double, Double>());
-    assertEquals(1l, Math.round(orNN.predict(createSample(0d))[0]));
-    assertEquals(0l, Math.round(orNN.predict(createSample(1d))[0]));
+    NeuralNetwork orNN = createNN(notRealMatrixSet, new VoidLearningStrategy<>());
+    assertEquals(1L, Math.round(orNN.predict(createSample(0d))[0]));
+    assertEquals(0L, Math.round(orNN.predict(createSample(1d))[0]));
   }
 
   @Test
@@ -77,11 +77,11 @@ public class NeuralNetworkIntegrationTes
     RealMatrix firstNorLayerWeights = new Array2DRowRealMatrix(new double[][]{{0, 0, 0}, {-30d, 20d, 20d}, {10d, -20d, -20d}});
     RealMatrix secondNorLayerWeights = new Array2DRowRealMatrix(new double[][]{{-10d, 20d, 20d}});
     RealMatrix[] norRealMatrixSet = new RealMatrix[]{firstNorLayerWeights, secondNorLayerWeights};
-    NeuralNetwork norNN = createNN(norRealMatrixSet, new VoidLearningStrategy<Double, Double>());
-    assertEquals(0l, Math.round(norNN.predict(createSample(1d, 0d))[0]));
-    assertEquals(0l, Math.round(norNN.predict(createSample(0d, 1d))[0]));
-    assertEquals(1l, Math.round(norNN.predict(createSample(0d, 0d))[0]));
-    assertEquals(1l, Math.round(norNN.predict(createSample(1d, 1d))[0]));
+    NeuralNetwork norNN = createNN(norRealMatrixSet, new VoidLearningStrategy<>());
+    assertEquals(0L, Math.round(norNN.predict(createSample(1d, 0d))[0]));
+    assertEquals(0L, Math.round(norNN.predict(createSample(0d, 1d))[0]));
+    assertEquals(1L, Math.round(norNN.predict(createSample(0d, 0d))[0]));
+    assertEquals(1L, Math.round(norNN.predict(createSample(1d, 1d))[0]));
   }
 
   @Test
@@ -91,10 +91,10 @@ public class NeuralNetworkIntegrationTes
 
     RealMatrix[] RealMatrixes = new RealMatrix[]{firstLayer, secondLayer};
 
-    NeuralNetwork neuralNetwork = createNN(RealMatrixes, new VoidLearningStrategy<Double, Double>());
+    NeuralNetwork neuralNetwork = createNN(RealMatrixes, new VoidLearningStrategy<>());
 
     Double prdictedValue = neuralNetwork.predict(createSample(5d, 6d, 7d))[0];
-    assertEquals(1l, Math.round(prdictedValue));
+    assertEquals(1L, Math.round(prdictedValue));
     assertEquals(Double.valueOf(0.9975273768433653d), prdictedValue);
   }
 
@@ -104,20 +104,17 @@ public class NeuralNetworkIntegrationTes
   }
 
   private Input<Double> createSample(final Double... params) {
-    return new Input<Double>() {
-      @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
-        Feature<Double> byasFeature = new Feature<Double>();
-        byasFeature.setValue(1d);
-        features.add(byasFeature);
-        for (Double d : params) {
-          Feature<Double> feature = new Feature<Double>();
-          feature.setValue(d);
-          features.add(feature);
-        }
-        return features;
+    return () -> {
+      ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
+      Feature<Double> byasFeature = new Feature<Double>();
+      byasFeature.setValue(1d);
+      features.add(byasFeature);
+      for (Double d : params) {
+        Feature<Double> feature = new Feature<Double>();
+        feature.setValue(d);
+        features.add(feature);
       }
+      return features;
     };
   }
 
@@ -128,7 +125,7 @@ public class NeuralNetworkIntegrationTes
     NeuralNetwork nn = createNN(randomWeights, new BackPropagationLearningStrategy());
     int noOfFeatures = randomWeights[0].getColumnDimension() - 1;
     Collection<TrainingExample<Double, Double>> samples = createSamples(1000000, noOfFeatures, noOfOutputs);
-    nn.learn(new TrainingSet<Double, Double>(samples));
+    nn.learn(new TrainingSet<>(samples));
     DistanceMeasure distanceMeasure = new CanberraDistance();
     for (TrainingExample<Double, Double> sample : samples) {
       Double[] predictedOutput = nn.predict(sample);
@@ -149,7 +146,7 @@ public class NeuralNetworkIntegrationTes
 
   private Collection<TrainingExample<Double, Double>> createSamples(int size, int noOfFeatures, int noOfOutputs) {
     Random r = new Random();
-    Collection<TrainingExample<Double, Double>> trainingExamples = new ArrayList<TrainingExample<Double, Double>>(size);
+    Collection<TrainingExample<Double, Double>> trainingExamples = new ArrayList<>(size);
     for (int i = 0; i < size; i++) {
       Double[] featureValues = new Double[noOfFeatures];
       for (int j = 0; j < noOfFeatures; j++) {

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/VoidLearningStrategyTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/VoidLearningStrategyTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/VoidLearningStrategyTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/VoidLearningStrategyTest.java Thu Nov 26 15:07:41 2015
@@ -37,9 +37,9 @@ public class VoidLearningStrategyTest {
 
   @Test
   public void testNoLearning() throws Exception {
-    VoidLearningStrategy<String, String> learningStrategy = new VoidLearningStrategy<String, String>();
-    Collection<TrainingExample<String, String>> trainingExamples = new LinkedList<TrainingExample<String, String>>();
-    TrainingSet<String, String> trainingSet = new TrainingSet<String, String>(trainingExamples);
+    VoidLearningStrategy<String, String> learningStrategy = new VoidLearningStrategy<>();
+    Collection<TrainingExample<String, String>> trainingExamples = new LinkedList<>();
+    TrainingSet<String, String> trainingSet = new TrainingSet<>(trainingExamples);
     RealMatrix[] weightsMatrixSet = new RealMatrix[1];
     double[][] weights = {{1d, 2d,}, {2d, 4d}};
     weightsMatrixSet[0] = new Array2DRowRealMatrix(weights);

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java?rev=1716717&r1=1716716&r2=1716717&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java Thu Nov 26 15:07:41 2015
@@ -21,13 +21,9 @@ package org.apache.yay.core;
 import com.google.common.base.Splitter;
 import org.apache.commons.math3.linear.MatrixUtils;
 import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.commons.math3.ml.distance.CanberraDistance;
-import org.apache.commons.math3.ml.distance.ChebyshevDistance;
 import org.apache.commons.math3.ml.distance.DistanceMeasure;
-import org.apache.commons.math3.ml.distance.EarthMoversDistance;
 import org.apache.commons.math3.ml.distance.EuclideanDistance;
-import org.apache.commons.math3.ml.distance.ManhattanDistance;
-import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
+import org.apache.commons.math3.util.FastMath;
 import org.apache.yay.ActivationFunction;
 import org.apache.yay.Feature;
 import org.apache.yay.NeuralNetwork;
@@ -101,86 +97,86 @@ public class WordVectorsTest {
     System.out.println("initializing neural network");
     RealMatrix[] randomWeights = createRandomWeights(inputSize, hiddenSize, outputSize);
 
-    Map<Integer, ActivationFunction<Double>> activationFunctions = new HashMap<Integer, ActivationFunction<Double>>();
-    activationFunctions.put(0, new IdentityActivationFunction<Double>());
+    Map<Integer, ActivationFunction<Double>> activationFunctions = new HashMap<>();
+    activationFunctions.put(0, new IdentityActivationFunction<>());
     activationFunctions.put(1, new SoftmaxActivationFunction());
     FeedForwardStrategy predictionStrategy = new FeedForwardStrategy(activationFunctions);
-    BackPropagationLearningStrategy learningStrategy = new BackPropagationLearningStrategy(0.001d, 1,
+    BackPropagationLearningStrategy learningStrategy = new BackPropagationLearningStrategy(0.000004d, 1,
             BackPropagationLearningStrategy.DEFAULT_THRESHOLD, predictionStrategy, new CrossEntropyCostFunction(),
             trainingSet.size());
     NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(randomWeights, learningStrategy, predictionStrategy);
 
     System.out.println("learning...");
+    long start = System.currentTimeMillis();
     RealMatrix[] learnedWeights = neuralNetwork.learn(trainingSet);
+    System.out.println("learning finished in " + (System.currentTimeMillis() - start) / 60000 + " minutes");
 
-    System.out.println("learning finished");
     RealMatrix wordVectors = learnedWeights[0];
 
     assertNotNull(wordVectors);
 
     if (serialize) {
-      System.out.println("serializing word vectors");
-      BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("target/sg-vectors.csv")));
-      for (int i = 1; i < wordVectors.getColumnDimension(); i++) {
-        double[] a = wordVectors.getColumnVector(i).toArray();
-        String csq = Arrays.toString(Arrays.copyOfRange(a, 1, a.length));
-        csq = csq.substring(1, csq.length() - 1);
-        bufferedWriter.append(csq);
-        bufferedWriter.append(",");
-        bufferedWriter.append(vocabulary.get(i - 1));
-        bufferedWriter.newLine();
-      }
-      bufferedWriter.flush();
-      bufferedWriter.close();
+      serialize(vocabulary, wordVectors);
     }
 
     if (measure) {
-      System.out.println("measuring similarities");
-      Collection<DistanceMeasure> measures = new LinkedList<DistanceMeasure>();
-      measures.add(new EuclideanDistance());
-      measures.add(new CanberraDistance());
-      measures.add(new ChebyshevDistance());
-      measures.add(new ManhattanDistance());
-      measures.add(new EarthMoversDistance());
-      measures.add(new DistanceMeasure() {
-        private final PearsonsCorrelation pearsonsCorrelation = new PearsonsCorrelation();
-
-        @Override
-        public double compute(double[] a, double[] b) {
-          return 1 / pearsonsCorrelation.correlation(a, b);
-        }
-
-        @Override
-        public String toString() {
-          return "inverse pearson correlation distance measure";
-        }
-      });
-      measures.add(new DistanceMeasure() {
-        @Override
-        public double compute(double[] a, double[] b) {
-          double dp = 0.0;
-          double na = 0.0;
-          double nb = 0.0;
-          for (int i = 0; i < a.length; i++) {
-            dp += a[i] * b[i];
-            na += Math.pow(a[i], 2);
-            nb += Math.pow(b[i], 2);
-          }
-          double cosineSimilarity = dp / (Math.sqrt(na) * Math.sqrt(nb));
-          return 1 / cosineSimilarity;
-        }
+      measure(vocabulary, wordVectors);
+    }
+  }
 
-        @Override
-        public String toString() {
-          return "inverse cosine similarity distance measure";
+  private void measure(List<String> vocabulary, RealMatrix wordVectors) {
+    System.out.println("measuring similarities");
+    Collection<DistanceMeasure> measures = new LinkedList<>();
+    measures.add(new EuclideanDistance());
+    measures.add(new DistanceMeasure() {
+      @Override
+      public double compute(double[] a, double[] b) {
+        double dp = 0.0;
+        double na = 0.0;
+        double nb = 0.0;
+        for (int i = 0; i < a.length; i++) {
+          dp += a[i] * b[i];
+          na += Math.pow(a[i], 2);
+          nb += Math.pow(b[i], 2);
         }
-      });
+        double cosineSimilarity = dp / (Math.sqrt(na) * Math.sqrt(nb));
+        return 1 / cosineSimilarity;
+      }
 
-      for (DistanceMeasure distanceMeasure : measures) {
-        System.out.println("computing similarity using " + distanceMeasure);
-        computeSimilarities(vocabulary, wordVectors, distanceMeasure);
+      @Override
+      public String toString() {
+        return "inverse cosine similarity distance measure";
       }
+    });
+    measures.add((DistanceMeasure) (a, b) -> {
+      double da = FastMath.sqrt(MatrixUtils.createRealVector(a).dotProduct(MatrixUtils.createRealVector(a)));
+      double db = FastMath.sqrt(MatrixUtils.createRealVector(b).dotProduct(MatrixUtils.createRealVector(b)));
+      return Math.abs(db - da);
+    });
+    for (DistanceMeasure distanceMeasure : measures) {
+      System.out.println("computing similarity using " + distanceMeasure);
+      computeSimilarities(vocabulary, wordVectors, distanceMeasure);
+    }
+  }
+
+  private void serialize(List<String> vocabulary, RealMatrix wordVectors) throws IOException {
+    System.out.println("serializing word vectors");
+    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("target/sg-vectors.csv")));
+    for (int i = 1; i < wordVectors.getColumnDimension(); i++) {
+      double[] a = wordVectors.getColumnVector(i).toArray();
+      String csq = Arrays.toString(Arrays.copyOfRange(a, 1, a.length));
+      csq = csq.substring(1, csq.length() - 1);
+      bufferedWriter.append(csq);
+      bufferedWriter.append(", ");
+      bufferedWriter.append(vocabulary.get(i - 1));
+      bufferedWriter.newLine();
     }
+    bufferedWriter.flush();
+    bufferedWriter.close();
+
+    // for post processing with dimensionality reduction (PCA, t-SNE, etc.):
+    // values: awk '{$hiddenSize=""; print $0}' target/sg-vectors.csv
+    // keys: awk '{print $hiddenSize}' target/sg-vectors.csv
   }
 
   private void computeSimilarities(List<String> vocabulary, RealMatrix wordVectors, DistanceMeasure distanceMeasure) {
@@ -197,7 +193,7 @@ public class WordVectorsTest {
         if (i != j) {
           double[] vector = wordVectors.getColumn(j);
           vector = Arrays.copyOfRange(vector, 1, vector.length);
-          double similarity = 1 / distanceMeasure.compute(subjectVector, vector);
+          double similarity = 1d / distanceMeasure.compute(subjectVector, vector);
           if (similarity > maxSimilarity) {
             maxSimilarity2 = maxSimilarity1;
             j2 = j1;
@@ -220,7 +216,7 @@ public class WordVectorsTest {
         }
       }
       if (i > 0 && j0 > 0 && j1 > 0 && j2 > 0) {
-        System.out.println(vocabulary.get(i - 1) + " is similar to "
+        System.out.println(vocabulary.get(i - 1) + " -> "
                 + vocabulary.get(j0 - 1) + ", "
                 + vocabulary.get(j1 - 1) + ", "
                 + vocabulary.get(j2 - 1));
@@ -280,18 +276,17 @@ public class WordVectorsTest {
 
   private List<String> getVocabulary(Path path) throws IOException {
     long start = System.currentTimeMillis();
-    Set<String> vocabulary = new HashSet<String>();
-    SeekableByteChannel sbc = Files.newByteChannel(path);
+    Set<String> vocabulary = new HashSet<>();
     ByteBuffer buf = ByteBuffer.allocate(100);
-    try {
+    try (SeekableByteChannel sbc = Files.newByteChannel(path)) {
 
       String encoding = System.getProperty("file.encoding");
       StringBuilder previous = new StringBuilder();
-      Splitter splitter = Splitter.on(Pattern.compile("[\\n\\s]")).omitEmptyStrings().trimResults();
+      Splitter splitter = Splitter.on(Pattern.compile("[\\\n\\s]")).omitEmptyStrings().trimResults();
       while (sbc.read(buf) > 0) {
         buf.rewind();
         CharBuffer charBuffer = Charset.forName(encoding).decode(buf);
-        String string = charBuffer.toString();
+        String string = cleanString(charBuffer);
         List<String> split = splitter.splitToList(string);
         int splitSize = split.size();
         if (splitSize > 1) {
@@ -310,7 +305,6 @@ public class WordVectorsTest {
     } catch (IOException x) {
       System.err.println("caught exception: " + x);
     } finally {
-      sbc.close();
       buf.clear();
     }
     long end = System.currentTimeMillis();
@@ -320,9 +314,14 @@ public class WordVectorsTest {
     return list;
   }
 
+  private String cleanString(CharBuffer charBuffer) {
+    String s = charBuffer.toString();
+    return s.toLowerCase().replaceAll("\\.", " ").replaceAll("\\;", " ").replaceAll("\\,", " ").replaceAll("\\:", " ").replaceAll("\\-","").replaceAll("\\\"","");
+  }
+
   private List<String> getVocabulary(Collection<byte[]> sentences) {
     long start = System.currentTimeMillis();
-    List<String> vocabulary = new LinkedList<String>();
+    List<String> vocabulary = new LinkedList<>();
     for (byte[] sentence : sentences) {
       for (String token : new String(sentence).split(" ")) {
         if (!vocabulary.contains(token)) {
@@ -339,31 +338,28 @@ public class WordVectorsTest {
 
   private Queue<List<byte[]>> getFragments(Path path, int w) throws IOException {
     long start = System.currentTimeMillis();
-    Queue<List<byte[]>> fragments = new ConcurrentLinkedDeque<List<byte[]>>();
+    Queue<List<byte[]>> fragments = new ConcurrentLinkedDeque<>();
 
-    SeekableByteChannel sbc = Files.newByteChannel(path);
     ByteBuffer buf = ByteBuffer.allocate(100);
-    try {
+    try (SeekableByteChannel sbc = Files.newByteChannel(path)) {
 
       String encoding = System.getProperty("file.encoding");
       StringBuilder previous = new StringBuilder();
       Splitter splitter = Splitter.on(Pattern.compile("[\\n\\s]")).omitEmptyStrings().trimResults();
-      int lastConsumedIndex = -1;
       while (sbc.read(buf) > 0) {
         buf.rewind();
         CharBuffer charBuffer = Charset.forName(encoding).decode(buf);
-        String string = charBuffer.toString();
+        String string = cleanString(charBuffer);
         List<String> split = splitter.splitToList(string);
         int splitSize = split.size();
         if (splitSize > w) {
           for (int j = 0; j < splitSize - w; j++) {
-            List<byte[]> fragment = new ArrayList<byte[]>(w);
+            List<byte[]> fragment = new ArrayList<>(w);
             fragment.add(previous.append(split.get(j)).toString().getBytes());
             for (int i = 1; i < w; i++) {
               fragment.add(split.get(i + j).getBytes());
             }
             // TODO : this has to be used to re-use the tokens that have not been consumed in next iteration
-            lastConsumedIndex = j + w;
             fragments.add(fragment);
             previous = new StringBuilder();
           }
@@ -376,7 +372,6 @@ public class WordVectorsTest {
     } catch (IOException x) {
       System.err.println("caught exception: " + x);
     } finally {
-      sbc.close();
       buf.clear();
     }
     long end = System.currentTimeMillis();
@@ -385,13 +380,13 @@ public class WordVectorsTest {
   }
 
   private Collection<String> getSentences() throws IOException {
-    Collection<String> sentences = new LinkedList<String>();
+    Collection<String> sentences = new LinkedList<>();
 
     InputStream resourceAsStream = getClass().getResourceAsStream("/word2vec/test.txt");
     BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
     String line;
     while ((line = bufferedReader.readLine()) != null) {
-      String cleanLine = line.toLowerCase().replaceAll("\\.", "").replaceAll("\\;", "").replaceAll("\\,", "").replaceAll("\\:", "");
+      String cleanLine = line.toLowerCase().replaceAll("\\.", " ").replaceAll(";", " ").replaceAll(",", " ").replaceAll(":", " ").replaceAll("-","");
       sentences.add(cleanLine);
     }
     return sentences;

Added: labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt?rev=1716717&view=auto
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt (added)
+++ labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt Thu Nov 26 15:07:41 2015
@@ -0,0 +1,25 @@
+A calculus which combined the flexible geometric structure of vector mod- els with the crisp efficiency of Boolean logic would be extremely beneficial for modelling natural language. With this goal in mind, we present a formulation for logical connectives in vector spaces based on standard linear algebra, giving ex- amples of the use of vector negation to discriminate between different senses of ambiguous words. It turns out that the operators developed in this way are pre- cisely the connectives of quantum logic (Birkhoff and von Neumann, 1936), which to our knowledge have not been exploited before in natural language processing. In quantum logic, arbitrary sets are replaced by linear subspaces of a vector space, and set unions, intersections and complements are replaced by vector sum, inter- section and orthogonal complements of subspaces. We demonstrate that these logi- cal connectives (particularly the orthogonal complement for negation) are powerful tools for exploring and anal
 ysing word meanings and show distinct advantages over Boolean operators in document retrieval experiments.
+This paper is organised as follows. In Section 1.1 we describe some of the ways vectors have been used to represent the meanings of terms and documents in natural language processing, and describe the way the WORD-SPACE used in our later experiments is built automatically from text corpora. In Section 1.2 we define the logical connectives on vector spaces, focussing particularly on negation and disjunction. This introduces the basic material needed to understand the worked examples given in Section 1.3, and the document retrieval experiments described in Section 1.3.1. Section 1.4 gives a much fuller outline of the theory of quantum logic, the natural setting for the operators of Section 1.2. Finally, in Section 1.5, we examine the similarities between quantum logic and WORD-SPACE, asking whether quantum logic is an appropriate framework for modelling word-meanings or if the
+initial successes we have obtained are mainly coincidental.
+To some extent, this paper may have been written backwards, in that the im-plementation and examples are at the beginning and most of the theory is at the end. This is for two reasons. Firstly, we hoped to make the paper as accessible as possible and were afraid that beginning with an introduction to the full machinery of quantum logic would defeat this goal before the reader has a chance to realise that the techniques and equations used in this work are really quite elementary. Secondly, the link with ‘quantum logic’ was itself only brought to our attention after the bulk of the results in this paper had been obtained, and since this research is very much ongoing, we deemed it appropriate to give an honest account of its history and current state.
+We propose two novel model architectures for computing continuous vector repre- sentations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ- ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor- mance on our test set for measuring syntactic and semantic word similarities.
+Information Retrieval (IR) models need to deal with two difficult issues, vocabulary mismatch and term dependencies. Vocabulary mismatch corresponds to the difficulty of retrieving relevant documents that do not contain exact query terms but semantically related terms. Term dependencies refers to the need of considering the relationship between the words of the query when estimating the relevance of a document. A multitude of solutions has been proposed to solve each of these two problems, but no principled model solve both. In parallel, in the last few years, language models based on neural networks have been used to cope with complex natural language processing tasks like emotion and paraphrase detection. Although they present good abilities to cope with both term dependencies and vocabulary mismatch problems, thanks to the distributed representation of words they are based upon, such models could not be used readily in IR, where the estimation of one language model per document (
 or query) is required. This is both computationally unfeasible and prone to over-fitting. Based on a recent work that proposed to learn a generic language model that can be modified through a set of document-specific parameters, we explore use of new neural network models that are adapted to ad-hoc IR tasks. Within the language model IR framework, we propose and study the use of a generic language model as well as a document-specific language model. Both can be used as a smoothing component, but the latter is more adapted to the document at hand and has the potential of being used as a full document language model. We experiment with such models and analyze their results on TREC-1 to 8 datasets.
+Bidirectional Long Short-Term Mem- ory Recurrent Neural Network (BLSTM- RNN) has been shown to be very effec- tive for modeling and predicting sequen- tial data, e.g. speech utterances or hand- written documents. In this study, we propose to use BLSTM-RNN for a uni- fied tagging solution that can be applied to various tagging tasks including part- of-speech tagging, chunking and named entity recognition. Instead of exploiting specific features carefully optimized for each task, our solution only uses one set of task-independent features and internal representations learnt from unlabeled text for all tasks. Requiring no task specific knowledge or sophisticated feature engi- neering, our approach gets nearly state-of- the-art performance in all these three tag- ging tasks.
+The recently introduced continuous Skip-gram model is an efficient method for learning high-quality distributed vector representations that capture a large num- ber of precise syntactic and semantic word relationships. In this paper we present several extensions that improve both the quality of the vectors and the training speed. By subsampling of the frequent words we obtain significant speedup and also learn more regular word representations. We also describe a simple alterna- tive to the hierarchical softmax called negative sampling.
+An inherent limitation of word representations is their indifference to word order and their inability to represent idiomatic phrases. For example, the meanings of “Canada” and “Air” cannot be easily combined to obtain “Air Canada”. Motivated by this example, we present a simple method for finding phrases in text, and show that learning good vector representations for millions of phrases is possible.
+We extend the word2vec framework to capture meaning across languages. The input consists of a source text and a word-aligned parallel text in a second language. The joint word2vec tool then repre- sents words in both languages within a common “semantic” vector space. The result can be used to enrich lexicons of under-resourced languages, to identify ambiguities, and to perform clustering and classification. Experiments were conducted on a parallel English-Arabic corpus, as well as on English and Hebrew Biblical texts.
+Unsupervised vector-based approaches to se- mantics can model rich lexical meanings, but they largely fail to capture sentiment informa- tion that is central to many word meanings and important for a wide range of NLP tasks. We present a model that uses a mix of unsuper- vised and supervised techniques to learn word vectors capturing semantic term–document in- formation as well as rich sentiment content. The proposed model can leverage both con- tinuous and multi-dimensional sentiment in- formation as well as non-sentiment annota- tions. We instantiate the model to utilize the document-level sentiment polarity annotations present in many online documents (e.g. star ratings). We evaluate the model using small, widely used sentiment and subjectivity cor- pora and find it out-performs several previ- ously introduced methods for sentiment clas- sification. We also introduce a large dataset of movie reviews to serve as a more robust benchmark for work in this area.
+We report our participation in the contextual suggestion track of TREC 2014 for which we submitted two runs using a novel ap- proach to complete the competition. The goal of the track is to generate suggestions that users might fond of given the history of users’ prefer- ence where he or she used to live in when they travel to a new city. We tested our new approach in the dataset of ClueWeb12-CatB which has been pre-indexed by Luence. Our system represents all attractions and user contexts in the continuous vector space learnt by neural network language models, and then we learn the user-dependent profile model to predict the user’s ratings for the attraction’s websites using Softmax. Finally, we rank all the venues by using the generated model according the users’ personal preference.
+We present a comprehensive study of eval- uation methods for unsupervised embed- ding techniques that obtain meaningful representations of words from text. Differ- ent evaluations result in different orderings of embedding methods, calling into ques- tion the common assumption that there is one single optimal vector representation. We present new evaluation techniques that directly compare embeddings with respect to specific queries. These methods re- duce bias, provide greater insight, and allow us to solicit data-driven relevance judgments rapidly and accurately through crowdsourcing.
+Continuous word and phrase vectors have proven useful in a number of NLP tasks. Here we describe our experience using them as a source of features for the SemEval-2015 task 3, consisting of two community question an- swering subtasks: Answer Selection for cate- gorizing answers as potential, good, and bad with regards to their corresponding questions; and YES/NO inference for predicting a yes, no, or unsure response to a YES/NO question us- ing all of its good answers. Our system ranked 6th and 1st in the English answer selection and YES/NO inference subtasks respectively, and 2nd in the Arabic answer selection subtask.
+The word2vec model and application by Mikolov et al. have attracted a great amount of attention in recent two years. The vector representations of words learned by word2vec models have been proven to be able to carry semantic meanings and are useful in various NLP tasks. As an increasing number of researchers would like to experiment with word2vec, I notice that there lacks a material that comprehensively explains the parameter learning process of word2vec in details, thus preventing many people with less neural network experience from understanding how exactly word2vec works.
+This note provides detailed derivations and explanations of the parameter up- date equations for the word2vec models, including the original continuous bag-of-word (CBOW) and skip-gram models, as well as advanced tricks, hierarchical soft-max and negative sampling. In the appendix a review is given on the basics of neuron network models and backpropagation.
+Over the past few years, neural networks have re-emerged as powerful machine-learning
+models, yielding state-of-the-art results in fields such as image recognition and speech
+processing. More recently, neural network models started to be applied also to textual
+natural language signals, again with very promising results. This tutorial surveys neural
+network models from the perspective of natural language processing research, in an attempt
+to bring natural-language researchers up to speed with the neural techniques. The tutorial
+covers input encoding for natural language tasks, feed-forward networks, convolutional
+networks, recurrent networks and recursive networks, as well as the computation graph
+abstraction for automatic gradient computation
\ No newline at end of file

Propchange: labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt
------------------------------------------------------------------------------
    svn:eol-style = native



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org


Mime
View raw message