mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vans...@apache.org
Subject [20/52] [partial] mahout git commit: removed all files except for website directory
Date Tue, 27 Jun 2017 16:14:45 GMT
http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java b/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
deleted file mode 100644
index 8a226a0..0000000
--- a/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
+++ /dev/null
@@ -1,321 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.clustering;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.mahout.clustering.canopy.Canopy;
-import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.clustering.evaluation.ClusterEvaluator;
-import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
-import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
-import org.apache.mahout.clustering.kmeans.TestKmeansClustering;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.distance.DistanceMeasure;
-import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.VectorWritable;
-import org.junit.Before;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-public final class TestClusterEvaluator extends MahoutTestCase {
-  
-  private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {2, 2}, {3, 3}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
-  
-  private List<VectorWritable> referenceData = Lists.newArrayList();
-  
-  private final List<VectorWritable> sampleData = Lists.newArrayList();
-  
-  private Map<Integer,List<VectorWritable>> representativePoints;
-  
-  private List<Cluster> clusters;
-  
-  private static final Logger log = LoggerFactory.getLogger(TestClusterEvaluator.class);
-  
-  private Configuration conf;
-  
-  private FileSystem fs;
-  
-  private Path testdata;
-  
-  private Path output;
-  
-  @Override
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    conf = getConfiguration();
-    fs = FileSystem.get(conf);
-    testdata = getTestTempDirPath("testdata");
-    output = getTestTempDirPath("output");
-    // Create small reference data set
-    referenceData = TestKmeansClustering.getPointsWritable(REFERENCE);
-    // generate larger test data set for the clustering tests to chew on
-    generateSamples();
-  }
-  
-  /**
-   * Generate random samples and add them to the sampleData
-   * 
-   * @param num
-   *          int number of samples to generate
-   * @param mx
-   *          double x-value of the sample mean
-   * @param my
-   *          double y-value of the sample mean
-   * @param sd
-   *          double standard deviation of the samples
-   */
-  private void generateSamples(int num, double mx, double my, double sd) {
-    log.info("Generating {} samples m=[{}, {}] sd={}", num, mx, my, sd);
-    for (int i = 0; i < num; i++) {
-      sampleData.add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sd),
-          UncommonDistributions.rNorm(my, sd)})));
-    }
-  }
-  
-  private void generateSamples() {
-    generateSamples(500, 1, 1, 3);
-    generateSamples(300, 1, 0, 0.5);
-    generateSamples(300, 0, 2, 0.1);
-  }
-  
-  private void printRepPoints(int numIterations) {
-    RepresentativePointsDriver.printRepresentativePoints(output, numIterations);
-  }
-  
-  /**
-   * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center
-   * 
-   * @param dC
-   *          a double cluster center offset
-   * @param dP
-   *          a double representative point offset
-   * @param measure
-   *          the DistanceMeasure
-   */
-  private void initData(double dC, double dP, DistanceMeasure measure) {
-    clusters = Lists.newArrayList();
-    clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
-    representativePoints = Maps.newHashMap();
-    for (Cluster cluster : clusters) {
-      List<VectorWritable> points = Lists.newArrayList();
-      representativePoints.put(cluster.getId(), points);
-      points.add(new VectorWritable(cluster.getCenter().clone()));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, dP}))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, -dP}))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {-dP, -dP}))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {-dP, dP}))));
-    }
-  }
-
-  @Test
-  public void testRepresentativePoints() throws Exception {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    Configuration conf = getConfiguration();
-    // run using MR reference point calculation
-    CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, true, 0.0, true);
-    int numIterations = 2;
-    Path clustersIn = new Path(output, "clusters-0-final");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure,
-        numIterations, false);
-    printRepPoints(numIterations);
-    ClusterEvaluator evaluatorMR = new ClusterEvaluator(conf, clustersIn);
-    // now run again using sequential reference point calculation
-    HadoopUtil.delete(conf, output);
-    CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, true, 0.0, true);
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure,
-        numIterations, true);
-    printRepPoints(numIterations);
-    ClusterEvaluator evaluatorSeq = new ClusterEvaluator(conf, clustersIn);
-    // compare results
-    assertEquals("InterCluster Density", evaluatorMR.interClusterDensity(), evaluatorSeq.interClusterDensity(), EPSILON);
-    assertEquals("IntraCluster Density", evaluatorMR.intraClusterDensity(), evaluatorSeq.intraClusterDensity(), EPSILON);
-  }
-  
-  @Test
-  public void testCluster0() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.33333333333333315, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
-  }
-  
-  @Test
-  public void testCluster1() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.5, measure);
-    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.33333333333333315, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
-  }
-  
-  @Test
-  public void testCluster2() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.75, measure);
-    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.33333333333333315, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
-  }
-  
-  /**
-   * adding an empty cluster should modify the inter cluster density but not change the intra-cluster density as that
-   * cluster would have NaN as its intra-cluster density and NaN values are ignored by the evaluator
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testEmptyCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = Lists.newArrayList();
-    representativePoints.put(cluster.getId(), points);
-    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.371534146934532, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
-  }
-  
-  /**
-   * adding an single-valued cluster should modify the inter cluster density but not change the intra-cluster density as
-   * that cluster would have NaN as its intra-cluster density and NaN values are ignored by the evaluator
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testSingleValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = Lists.newArrayList();
-    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {1, 1}))));
-    representativePoints.put(cluster.getId(), points);
-    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.3656854249492381, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
-  }
-  
-  /**
-   * Representative points extraction will duplicate the cluster center if the cluster has no assigned points. These
-   * clusters are included in the inter-cluster density but their NaN intra-density values are ignored by the evaluator.
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testAllSameValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = Lists.newArrayList();
-    points.add(new VectorWritable(cluster.getCenter()));
-    points.add(new VectorWritable(cluster.getCenter()));
-    points.add(new VectorWritable(cluster.getCenter()));
-    representativePoints.put(cluster.getId(), points);
-    ClusterEvaluator evaluator = new ClusterEvaluator(representativePoints, clusters, measure);
-    assertEquals("inter cluster density", 0.3656854249492381, evaluator.interClusterDensity(), EPSILON);
-    assertEquals("intra cluster density", 0.3656854249492381, evaluator.intraClusterDensity(), EPSILON);
-  }
-  
-  @Test
-  public void testCanopy() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    Configuration conf = getConfiguration();
-    CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, true, 0.0, true);
-    int numIterations = 10;
-    Path clustersIn = new Path(output, "clusters-0-final");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure,
-        numIterations, true);
-    //printRepPoints(numIterations);
-    ClusterEvaluator evaluator = new ClusterEvaluator(conf, clustersIn);
-    // now print out the Results
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-  }
-  
-  @Test
-  public void testKmeans() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    // now run the Canopy job to prime kMeans canopies
-    Configuration conf = getConfiguration();
-    CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, false, 0.0, true);
-    // now run the KMeans job
-    Path kmeansOutput = new Path(output, "kmeans");
-    KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, true);
-    int numIterations = 10;
-    Path clustersIn = new Path(kmeansOutput, "clusters-2");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
-        numIterations, true);
-    RepresentativePointsDriver.printRepresentativePoints(kmeansOutput, numIterations);
-    ClusterEvaluator evaluator = new ClusterEvaluator(conf, clustersIn);
-    // now print out the Results
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-  }
-  
-  @Test
-  public void testFuzzyKmeans() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, new Path(testdata, "file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    // now run the Canopy job to prime kMeans canopies
-    Configuration conf = getConfiguration();
-    CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, false, 0.0, true);
-    Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
-    // now run the KMeans job
-    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, 0.001, 10, 2,
-        true, true, 0, true);
-    int numIterations = 10;
-    Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(fuzzyKMeansOutput, "clusteredPoints"), fuzzyKMeansOutput,
-        measure, numIterations, true);
-    RepresentativePointsDriver.printRepresentativePoints(fuzzyKMeansOutput, numIterations);
-    ClusterEvaluator evaluator = new ClusterEvaluator(conf, clustersIn);
-    // now print out the Results
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java b/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
deleted file mode 100644
index 597ed01..0000000
--- a/integration/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java
+++ /dev/null
@@ -1,326 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.clustering.cdbw;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.mahout.clustering.Cluster;
-import org.apache.mahout.clustering.ClusteringTestUtils;
-import org.apache.mahout.clustering.TestClusterEvaluator;
-import org.apache.mahout.clustering.UncommonDistributions;
-import org.apache.mahout.clustering.canopy.Canopy;
-import org.apache.mahout.clustering.canopy.CanopyDriver;
-import org.apache.mahout.clustering.evaluation.RepresentativePointsDriver;
-import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
-import org.apache.mahout.clustering.kmeans.TestKmeansClustering;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.distance.DistanceMeasure;
-import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.junit.Before;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public final class TestCDbwEvaluator extends MahoutTestCase {
-  
-  private static final double[][] REFERENCE = { {1, 1}, {2, 1}, {1, 2}, {2, 2}, {3, 3}, {4, 4}, {5, 4}, {4, 5}, {5, 5}};
-  
-  private static final Logger log = LoggerFactory.getLogger(TestClusterEvaluator.class);
-  
-  private Map<Integer,List<VectorWritable>> representativePoints;
-  
-  private List<Cluster> clusters;
-  
-  private Configuration conf;
-  
-  private FileSystem fs;
-  
-  private final Collection<VectorWritable> sampleData = new ArrayList<>();
-  
-  private List<VectorWritable> referenceData = new ArrayList<>();
-  
-  private Path testdata;
-  
-  private Path output;
-  
-  @Override
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    conf = getConfiguration();
-    fs = FileSystem.get(conf);
-    testdata = getTestTempDirPath("testdata");
-    output = getTestTempDirPath("output");
-    // Create small reference data set
-    referenceData = TestKmeansClustering.getPointsWritable(REFERENCE);
-    // generate larger test data set for the clustering tests to chew on
-    generateSamples();
-  }
-  
-  /**
-   * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center
-   * 
-   * @param dC
-   *          a double cluster center offset
-   * @param dP
-   *          a double representative point offset
-   * @param measure
-   *          the DistanceMeasure
-   */
-  private void initData(double dC, double dP, DistanceMeasure measure) {
-    clusters = new ArrayList<>();
-    clusters.add(new Canopy(new DenseVector(new double[] {-dC, -dC}), 1, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] {-dC, dC}), 3, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] {dC, dC}), 5, measure));
-    clusters.add(new Canopy(new DenseVector(new double[] {dC, -dC}), 7, measure));
-    representativePoints = new HashMap<>();
-    for (Cluster cluster : clusters) {
-      List<VectorWritable> points = new ArrayList<>();
-      representativePoints.put(cluster.getId(), points);
-      points.add(new VectorWritable(cluster.getCenter().clone()));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, dP}))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {dP, -dP}))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {-dP, -dP}))));
-      points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {-dP, dP}))));
-    }
-  }
-  
-  /**
-   * Generate random samples and add them to the sampleData
-   * 
-   * @param num
-   *          int number of samples to generate
-   * @param mx
-   *          double x-value of the sample mean
-   * @param my
-   *          double y-value of the sample mean
-   * @param sd
-   *          double standard deviation of the samples
-   */
-  private void generateSamples(int num, double mx, double my, double sd) {
-    log.info("Generating {} samples m=[{}, {}] sd={}", num, mx, my, sd);
-    for (int i = 0; i < num; i++) {
-      sampleData.add(new VectorWritable(new DenseVector(new double[] {UncommonDistributions.rNorm(mx, sd),
-          UncommonDistributions.rNorm(my, sd)})));
-    }
-  }
-  
-  private void generateSamples() {
-    generateSamples(500, 1, 1, 3);
-    generateSamples(300, 1, 0, 0.5);
-    generateSamples(300, 0, 2, 0.1);
-  }
-  
-  @Test
-  public void testCDbw0() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testCDbw1() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.5, measure);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testCDbw2() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.75, measure);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testEmptyCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {10, 10}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = new ArrayList<>();
-    representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testSingleValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = new ArrayList<>();
-    points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] {1, 1}))));
-    representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  /**
-   * Representative points extraction will duplicate the cluster center if the cluster has no assigned points. These
-   * clusters should be ignored like empty clusters above
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testAllSameValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = new ArrayList<>();
-    points.add(new VectorWritable(cluster.getCenter()));
-    points.add(new VectorWritable(cluster.getCenter()));
-    points.add(new VectorWritable(cluster.getCenter()));
-    representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  /**
-   * Clustering can produce very, very tight clusters that can cause the std calculation to fail. These clusters should
-   * be processed correctly.
-   * 
-   * @throws IOException
-   */
-  @Test
-  public void testAlmostSameValueCluster() throws IOException {
-    ClusteringTestUtils.writePointsToFile(referenceData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    initData(1, 0.25, measure);
-    Canopy cluster = new Canopy(new DenseVector(new double[] {0, 0}), 19, measure);
-    clusters.add(cluster);
-    List<VectorWritable> points = new ArrayList<>();
-    Vector delta = new DenseVector(new double[] {0, Double.MIN_NORMAL});
-    points.add(new VectorWritable(delta.clone()));
-    points.add(new VectorWritable(delta.clone()));
-    points.add(new VectorWritable(delta.clone()));
-    points.add(new VectorWritable(delta.clone()));
-    points.add(new VectorWritable(delta.clone()));
-    representativePoints.put(cluster.getId(), points);
-    CDbwEvaluator evaluator = new CDbwEvaluator(representativePoints, clusters, measure);
-    System.out.println("CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testCanopy() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, true, 0.0, true);
-    int numIterations = 10;
-    Path clustersIn = new Path(output, "clusters-0-final");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure,
-        numIterations, true);
-    CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    // printRepPoints(numIterations);
-    // now print out the Results
-    System.out.println("Canopy CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testKmeans() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
-    // now run the KMeans job
-    Path kmeansOutput = new Path(output, "kmeans");
-    KMeansDriver.run(testdata, new Path(output, "clusters-0-final"), kmeansOutput, 0.001, 10, true, 0.0, true);
-    int numIterations = 10;
-    Path clustersIn = new Path(kmeansOutput, "clusters-10-final");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(kmeansOutput, "clusteredPoints"), kmeansOutput, measure,
-        numIterations, true);
-    CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    RepresentativePointsDriver.printRepresentativePoints(kmeansOutput, numIterations);
-    // now print out the Results
-    System.out.println("K-Means CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-  @Test
-  public void testFuzzyKmeans() throws Exception {
-    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
-    DistanceMeasure measure = new EuclideanDistanceMeasure();
-    // now run the Canopy job to prime kMeans canopies
-    CanopyDriver.run(getConfiguration(), testdata, output, measure, 3.1, 2.1, false, 0.0, true);
-    Path fuzzyKMeansOutput = new Path(output, "fuzzyk");
-    // now run the KMeans job
-    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0-final"), fuzzyKMeansOutput, 0.001, 10, 2,
-        true, true, 0, true);
-    int numIterations = 10;
-    Path clustersIn = new Path(fuzzyKMeansOutput, "clusters-4");
-    RepresentativePointsDriver.run(conf, clustersIn, new Path(fuzzyKMeansOutput, "clusteredPoints"), fuzzyKMeansOutput,
-        measure, numIterations, true);
-    CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
-    RepresentativePointsDriver.printRepresentativePoints(fuzzyKMeansOutput, numIterations);
-    // now print out the Results
-    System.out.println("Fuzzy K-Means CDbw = " + evaluator.getCDbw());
-    System.out.println("Intra-cluster density = " + evaluator.intraClusterDensity());
-    System.out.println("Inter-cluster density = " + evaluator.interClusterDensity());
-    System.out.println("Separation = " + evaluator.separation());
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/MailArchivesClusteringAnalyzerTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/MailArchivesClusteringAnalyzerTest.java b/integration/src/test/java/org/apache/mahout/text/MailArchivesClusteringAnalyzerTest.java
deleted file mode 100644
index ba73c82..0000000
--- a/integration/src/test/java/org/apache/mahout/text/MailArchivesClusteringAnalyzerTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.text;
-
-import java.io.Reader;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-/**
- * Unit tests for the MailArchivesClusteringAnalyzer text analyzer.
- */
-public class MailArchivesClusteringAnalyzerTest extends MahoutTestCase {
-  
-  @Test
-  public void testAnalysis() throws Exception {
-    Analyzer analyzer = new MailArchivesClusteringAnalyzer();
-    
-    String text = "A test message\n"
-                  + "atokenthatistoolongtobeusefulforclustertextanalysis\n"
-                  + "Mahout is a scalable, machine-learning LIBRARY\n"
-                  + "we've added some additional stopwords such as html, mailto, regards\t"
-                  + "apache_hadoop provides the foundation for scalability\n"
-                  + "www.nabble.com general-help@incubator.apache.org\n"
-                  + "public void int protected package";
-    Reader reader = new StringReader(text);
-    
-    // if you change the text above, then you may need to change this as well
-    // order matters too
-    String[] expectedTokens = {
-        "test", "mahout", "scalabl", "machin", "learn", "librari", "weve", "ad",
-        "stopword", "apache_hadoop","provid", "foundat", "scalabl"
-    };
-        
-    TokenStream tokenStream = analyzer.tokenStream("test", reader);
-    assertNotNull(tokenStream);
-    tokenStream.reset();
-    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
-    int e = 0;
-    while (tokenStream.incrementToken() && e < expectedTokens.length) {
-      assertEquals(expectedTokens[e++], termAtt.toString());
-    }
-    assertEquals(e, expectedTokens.length);
-    tokenStream.end();
-    tokenStream.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
deleted file mode 100644
index ef2b8a6..0000000
--- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromMailArchivesTest.java
+++ /dev/null
@@ -1,240 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.util.zip.GZIPOutputStream;
-
-import org.apache.commons.lang3.SystemUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterator;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Test case for the SequenceFilesFromMailArchives command-line application.
- */
-public final class SequenceFilesFromMailArchivesTest extends MahoutTestCase {
-
-  private File inputDir;
-
-  /**
-   * Create the input and output directories needed for testing
-   * the SequenceFilesFromMailArchives application.
-   */
-  @Override
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    inputDir = getTestTempDir("mail-archives-in");
-
-    // write test mail messages to a gzipped file in a nested directory
-    File subDir = new File(inputDir, "subdir");
-    subDir.mkdir();
-    File gzFile = new File(subDir, "mail-messages.gz");
-    try (GZIPOutputStream gzOut = new GZIPOutputStream(new FileOutputStream(gzFile))) {
-      gzOut.write(testMailMessages.getBytes("UTF-8"));
-      gzOut.finish();
-    }
-    
-    File subDir2 = new File(subDir, "subsubdir");
-    subDir2.mkdir();
-    File gzFile2 = new File(subDir2, "mail-messages-2.gz");
-    try (GZIPOutputStream gzOut = new GZIPOutputStream(new FileOutputStream(gzFile2))) {
-      gzOut.write(testMailMessages.getBytes("UTF-8"));
-      gzOut.finish();
-    }
-  }
-
-  @Test
-  public void testSequential() throws Exception {
-
-    File outputDir = this.getTestTempDir("mail-archives-out");
-
-    String[] args = {
-      "--input", inputDir.getAbsolutePath(),
-      "--output", outputDir.getAbsolutePath(),
-      "--charset", "UTF-8",
-      "--keyPrefix", "TEST",
-      "--method", "sequential",
-      "--body", "--subject", "--separator", ""
-    };
-
-    // run the application's main method
-    SequenceFilesFromMailArchives.main(args);
-
-    // app should create a single SequenceFile named "chunk-0" in the output dir
-    File expectedChunkFile = new File(outputDir, "chunk-0");
-    String expectedChunkPath = expectedChunkFile.getAbsolutePath();
-    Assert.assertTrue("Expected chunk file " + expectedChunkPath + " not found!", expectedChunkFile.isFile());
-
-    Configuration conf = getConfiguration();
-    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(new Path(expectedChunkPath), true, conf);
-    Assert.assertTrue("First key/value pair not found!", iterator.hasNext());
-    Pair<Text, Text> record = iterator.next();
-
-    File parentFile = new File(new File(new File("TEST"), "subdir"), "mail-messages.gz");
-    Assert.assertEquals(new File(parentFile, testVars[0][0]).toString(), record.getFirst().toString());
-    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
-
-    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());
-
-    record = iterator.next();
-    Assert.assertEquals(new File(parentFile, testVars[1][0]).toString(), record.getFirst().toString());
-    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());
-
-    record = iterator.next();
-    File parentFileSubSubDir = new File(new File(new File(new File("TEST"), "subdir"), "subsubdir"), "mail-messages-2.gz");
-    Assert.assertEquals(new File(parentFileSubSubDir, testVars[0][0]).toString(), record.getFirst().toString());
-    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
-
-    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());
-    record = iterator.next();
-    Assert.assertEquals(new File(parentFileSubSubDir, testVars[1][0]).toString(), record.getFirst().toString());
-    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());
-
-    Assert.assertFalse("Only two key/value pairs expected!", iterator.hasNext());
-  }
-
-  @Test
-  public void testMapReduce() throws Exception {
-
-    Path tmpDir = getTestTempDirPath();
-    Path mrOutputDir = new Path(tmpDir, "mail-archives-out-mr");
-    Configuration configuration = getConfiguration();
-    FileSystem fs = FileSystem.get(configuration);
-
-    File expectedInputFile = new File(inputDir.toString());
-
-    String[] args = {
-      "-Dhadoop.tmp.dir=" + configuration.get("hadoop.tmp.dir"),
-      "--input", expectedInputFile.getAbsolutePath(),
-      "--output", mrOutputDir.toString(),
-      "--charset", "UTF-8",
-      "--keyPrefix", "TEST",
-      "--method", "mapreduce",
-      "--body", "--subject", "--separator", ""
-    };
-
-    // run the application's main method
-    SequenceFilesFromMailArchives.main(args);
-
-    // app should create a single SequenceFile named "chunk-0" in the output dir
-    FileStatus[] fileStatuses = fs.listStatus(mrOutputDir.suffix("/part-m-00000"));
-    assertEquals(1, fileStatuses.length); // only one
-    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
-    SequenceFileIterator<Text, Text> iterator =
-      new SequenceFileIterator<>(mrOutputDir.suffix("/part-m-00000"), true, configuration);
-
-    Assert.assertTrue("First key/value pair not found!", iterator.hasNext());
-    Pair<Text, Text> record = iterator.next();
-
-    File parentFileSubSubDir = new File(new File(new File(new File("TEST"), "subdir"), "subsubdir"), "mail-messages-2.gz");
-
-    String expected = record.getFirst().toString();
-    if (SystemUtils.IS_OS_WINDOWS) {
-      expected = expected.replace("/", "\\");
-    }
-    Assert.assertEquals(new File(parentFileSubSubDir, testVars[0][0]).toString(), expected);
-    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
-    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());
-
-    record = iterator.next();
-    expected = record.getFirst().toString();
-    if (SystemUtils.IS_OS_WINDOWS) {
-      expected = expected.replace("/", "\\");
-    }
-    Assert.assertEquals(new File(parentFileSubSubDir, testVars[1][0]).toString(), expected);
-    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());
-
-    // test other file
-    File parentFile = new File(new File(new File("TEST"), "subdir"), "mail-messages.gz");
-    record = iterator.next();
-    expected = record.getFirst().toString();
-    if (SystemUtils.IS_OS_WINDOWS) {
-      expected = expected.replace("/", "\\");
-    }
-    Assert.assertEquals(new File(parentFile, testVars[0][0]).toString(), expected);
-    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
-    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());
-
-    record = iterator.next();
-    expected = record.getFirst().toString();
-    if (SystemUtils.IS_OS_WINDOWS) {
-      expected = expected.replace("/", "\\");
-    }
-    Assert.assertEquals(new File(parentFile, testVars[1][0]).toString(), expected);
-    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());
-    Assert.assertFalse("Only four key/value pairs expected!", iterator.hasNext());
-  }
-
-  // Messages extracted and made anonymous from the ASF mail archives
-  private static final String[][] testVars = {
-    new String[] {
-      "user@example.com",
-      "Ant task for JDK1.1 collections build option",
-      "\nThis is just a test message\n--\nTesty McTester\n"
-    },
-    new String[] {
-      "somebody@example.com",
-      "Problem with build files in several directories",
-      "\nHi all,\nThis is another test message.\nRegards,\nAnother Test\n"
-    }
-  };
-
-  private static final String testMailMessages =
-    "From user@example.com  Mon Jul 24 19:13:53 2000\n"
-      + "Return-Path: <user@example.com>\n"
-      + "Mailing-List: contact ant-user-help@jakarta.apache.org; run by ezmlm\n"
-      + "Delivered-To: mailing list ant-user@jakarta.apache.org\n"
-      + "Received: (qmail 49267 invoked from network); 24 Jul 2000 19:13:53 -0000\n"
-      + "Message-ID: <" + testVars[0][0] + ">\n"
-      + "From: \"Testy McTester\" <user@example.com>\n"
-      + "To: <ant-user@jakarta.apache.org>\n"
-      + "Subject: " + testVars[0][1] + '\n'
-      + "Date: Mon, 24 Jul 2000 12:24:56 -0700\n"
-      + "MIME-Version: 1.0\n"
-      + "Content-Type: text/plain;\n"
-      + "  charset=\"Windows-1252\"\n"
-      + "Content-Transfer-Encoding: 7bit\n"
-      + "X-Spam-Rating: locus.apache.org 1.6.2 0/1000/N\n"
-      + testVars[0][2] + '\n'
-      + "From somebody@example.com  Wed Jul 26 11:32:16 2000\n"
-      + "Return-Path: <somebody@example.com>\n"
-      + "Mailing-List: contact ant-user-help@jakarta.apache.org; run by ezmlm\n"
-      + "Delivered-To: mailing list ant-user@jakarta.apache.org\n"
-      + "Received: (qmail 73966 invoked from network); 26 Jul 2000 11:32:16 -0000\n"
-      + "User-Agent: Microsoft-Outlook-Express-Macintosh-Edition/5.02.2022\n"
-      + "Date: Wed, 26 Jul 2000 13:32:08 +0200\n"
-      + "Subject: " + testVars[1][1] + '\n'
-      + "From: Another Test <somebody@example.com>\n"
-      + "To: <ant-user@jakarta.apache.org>\n"
-      + "Message-Id: <" + testVars[1][0] + ">\n"
-      + "Mime-Version: 1.0\n"
-      + "Content-Type: text/plain; charset=\"US-ASCII\"\n"
-      + "Content-Transfer-Encoding: 7bit\n"
-      + "X-Spam-Rating: locus.apache.org 1.6.2 0/1000/N\n"
-      + testVars[1][2];
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/TestPathFilter.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/TestPathFilter.java b/integration/src/test/java/org/apache/mahout/text/TestPathFilter.java
deleted file mode 100644
index 227521a..0000000
--- a/integration/src/test/java/org/apache/mahout/text/TestPathFilter.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-
-/**
- * Dummy Path Filter for testing the MapReduce version of
- * SequenceFilesFromDirectory
- */
-public class TestPathFilter implements PathFilter {
-
-  @Override
-  public boolean accept(Path path) {
-    return path.getName().startsWith("t") || path.getName().startsWith("r") || path.getName().startsWith("f");
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java b/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
deleted file mode 100644
index 040c8e4..0000000
--- a/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.text;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.io.Charsets;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterator;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public final class TestSequenceFilesFromDirectory extends MahoutTestCase {
-
-  private static final Logger logger = LoggerFactory.getLogger(TestSequenceFilesFromDirectory.class);
-
-  private static final String[][] DATA1 = {
-    {"test1", "This is the first text."},
-    {"test2", "This is the second text."},
-    {"test3", "This is the third text."}
-  };
-
-  private static final String[][] DATA2 = {
-    {"recursive_test1", "This is the first text."},
-    {"recursive_test2", "This is the second text."},
-    {"recursive_test3", "This is the third text."}
-  };
-
-  @Test
-  public void testSequenceFileFromDirectoryBasic() throws Exception {
-    // parameters
-    Configuration configuration = getConfiguration();
-
-    FileSystem fs = FileSystem.get(configuration);
-
-    // create
-    Path tmpDir = this.getTestTempDirPath();
-    Path inputDir = new Path(tmpDir, "inputDir");
-    fs.mkdirs(inputDir);
-
-    Path outputDir = new Path(tmpDir, "outputDir");
-    Path outputDirRecursive = new Path(tmpDir, "outputDirRecursive");
-
-    Path inputDirRecursive = new Path(tmpDir, "inputDirRecur");
-    fs.mkdirs(inputDirRecursive);
-
-    // prepare input files
-    createFilesFromArrays(configuration, inputDir, DATA1);
-
-    SequenceFilesFromDirectory.main(new String[]{
-      "--input", inputDir.toString(),
-      "--output", outputDir.toString(),
-      "--chunkSize", "64",
-      "--charset", Charsets.UTF_8.name(),
-      "--keyPrefix", "UID",
-      "--method", "sequential"});
-
-    // check output chunk files
-    checkChunkFiles(configuration, outputDir, DATA1, "UID");
-
-    createRecursiveDirFilesFromArrays(configuration, inputDirRecursive, DATA2);
-
-    FileStatus fstInputPath = fs.getFileStatus(inputDirRecursive);
-    String dirs = HadoopUtil.buildDirList(fs, fstInputPath);
-
-    System.out.println("\n\n ----- recursive dirs: " + dirs);
-    SequenceFilesFromDirectory.main(new String[]{
-      "--input", inputDirRecursive.toString(),
-      "--output", outputDirRecursive.toString(),
-      "--chunkSize", "64",
-      "--charset", Charsets.UTF_8.name(),
-      "--keyPrefix", "UID",
-      "--method", "sequential"});
-
-    checkRecursiveChunkFiles(configuration, outputDirRecursive, DATA2, "UID");
-  }
-
-  @Test
-  public void testSequenceFileFromDirectoryMapReduce() throws Exception {
-
-    Configuration conf = getConfiguration();
-
-    FileSystem fs = FileSystem.get(conf);
-
-    // create
-    Path tmpDir = this.getTestTempDirPath();
-    Path inputDir = new Path(tmpDir, "inputDir");
-    fs.mkdirs(inputDir);
-
-    Path inputDirRecur = new Path(tmpDir, "inputDirRecur");
-    fs.mkdirs(inputDirRecur);
-
-    Path mrOutputDir = new Path(tmpDir, "mrOutputDir");
-    Path mrOutputDirRecur = new Path(tmpDir, "mrOutputDirRecur");
-
-    createFilesFromArrays(conf, inputDir, DATA1);
-
-    SequenceFilesFromDirectory.main(new String[]{
-      "-Dhadoop.tmp.dir=" + conf.get("hadoop.tmp.dir"),
-      "--input", inputDir.toString(),
-      "--output", mrOutputDir.toString(),
-      "--chunkSize", "64",
-      "--charset", Charsets.UTF_8.name(),
-      "--method", "mapreduce",
-      "--keyPrefix", "UID",
-      "--fileFilterClass", "org.apache.mahout.text.TestPathFilter"
-    });
-
-    checkMRResultFiles(conf, mrOutputDir, DATA1, "UID");
-
-    createRecursiveDirFilesFromArrays(conf, inputDirRecur, DATA2);
-
-    FileStatus fst_input_path = fs.getFileStatus(inputDirRecur);
-    String dirs = HadoopUtil.buildDirList(fs, fst_input_path);
-
-    logger.info("\n\n ---- recursive dirs: {}", dirs);
-
-    SequenceFilesFromDirectory.main(new String[]{
-      "-Dhadoop.tmp.dir=" + conf.get("hadoop.tmp.dir"),
-      "--input", inputDirRecur.toString(),
-      "--output", mrOutputDirRecur.toString(),
-      "--chunkSize", "64",
-      "--charset", Charsets.UTF_8.name(),
-      "--method", "mapreduce",
-      "--keyPrefix", "UID",
-      "--fileFilterClass", "org.apache.mahout.text.TestPathFilter"
-    });
-
-    checkMRResultFilesRecursive(conf, mrOutputDirRecur, DATA2, "UID");
-  }
-
-
-  private static void createFilesFromArrays(Configuration conf, Path inputDir, String[][] data) throws IOException {
-    FileSystem fs = FileSystem.get(conf);
-    for (String[] aData : data) {
-      try (OutputStreamWriter writer =
-               new OutputStreamWriter(fs.create(new Path(inputDir, aData[0])), Charsets.UTF_8)){
-        writer.write(aData[1]);
-      }
-    }
-  }
-
-  private static void createRecursiveDirFilesFromArrays(Configuration configuration, Path inputDir,
-                                                        String[][] data) throws IOException {
-    FileSystem fs = FileSystem.get(configuration);
-
-    logger.info("creativeRecursiveDirFilesFromArrays > based on: {}", inputDir.toString());
-    Path curPath;
-    String currentRecursiveDir = inputDir.toString();
-
-    for (String[] aData : data) {
-      currentRecursiveDir += "/" + aData[0];
-      File subDir = new File(currentRecursiveDir);
-      subDir.mkdir();
-
-      curPath = new Path(subDir.toString(), "file.txt");
-      logger.info("Created file: {}", curPath.toString());
-
-      try (OutputStreamWriter writer = new OutputStreamWriter(fs.create(curPath), Charsets.UTF_8)){
-        writer.write(aData[1]);
-      }
-    }
-  }
-
-  private static void checkChunkFiles(Configuration configuration,
-                                      Path outputDir,
-                                      String[][] data,
-                                      String prefix) throws IOException {
-    FileSystem fs = FileSystem.get(configuration);
-
-    // output exists?
-    FileStatus[] fileStatuses = fs.listStatus(outputDir, PathFilters.logsCRCFilter());
-    assertEquals(1, fileStatuses.length); // only one
-    assertEquals("chunk-0", fileStatuses[0].getPath().getName());
-
-    Map<String, String> fileToData = new HashMap<>();
-    for (String[] aData : data) {
-      fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]);
-    }
-
-    // read a chunk to check content
-    try (SequenceFileIterator<Text, Text> iterator =
-             new SequenceFileIterator<>(fileStatuses[0].getPath(), true, configuration)){
-      while (iterator.hasNext()) {
-        Pair<Text, Text> record = iterator.next();
-        String retrievedData = fileToData.get(record.getFirst().toString().trim());
-        assertNotNull(retrievedData);
-        assertEquals(retrievedData, record.getSecond().toString().trim());
-      }
-    }
-  }
-
-  private static void checkRecursiveChunkFiles(Configuration configuration,
-                                               Path outputDir,
-                                               String[][] data,
-                                               String prefix) throws IOException {
-    FileSystem fs = FileSystem.get(configuration);
-
-    System.out.println(" ----------- check_Recursive_ChunkFiles ------------");
-
-    // output exists?
-    FileStatus[] fileStatuses = fs.listStatus(outputDir, PathFilters.logsCRCFilter());
-    assertEquals(1, fileStatuses.length); // only one
-    assertEquals("chunk-0", fileStatuses[0].getPath().getName());
-
-
-    Map<String, String> fileToData = new HashMap<>();
-    String currentPath = prefix;
-    for (String[] aData : data) {
-      currentPath += Path.SEPARATOR + aData[0];
-      fileToData.put(currentPath + Path.SEPARATOR + "file.txt", aData[1]);
-    }
-
-    // read a chunk to check content
-    try (SequenceFileIterator<Text, Text> iterator =
-             new SequenceFileIterator<>(fileStatuses[0].getPath(), true, configuration)) {
-      while (iterator.hasNext()) {
-        Pair<Text, Text> record = iterator.next();
-        String retrievedData = fileToData.get(record.getFirst().toString().trim());
-        System.out.printf("%s >> %s\n", record.getFirst().toString().trim(), record.getSecond().toString().trim());
-
-        assertNotNull(retrievedData);
-        assertEquals(retrievedData, record.getSecond().toString().trim());
-        System.out.printf(">>> k: %s, v: %s\n", record.getFirst().toString(), record.getSecond().toString());
-      }
-    }
-  }
-
-  private static void checkMRResultFiles(Configuration conf, Path outputDir,
-                                         String[][] data, String prefix) throws IOException {
-    FileSystem fs = FileSystem.get(conf);
-
-    // output exists?
-    FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
-    assertEquals(1, fileStatuses.length); // only one
-    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
-    Map<String, String> fileToData = new HashMap<>();
-    for (String[] aData : data) {
-      System.out.printf("map.put: %s %s\n", prefix + Path.SEPARATOR + aData[0], aData[1]);
-      fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]);
-    }
-
-    // read a chunk to check content
-    try (SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(
-        fileStatuses[0].getPath(), true, conf)) {
-      while (iterator.hasNext()) {
-        Pair<Text, Text> record = iterator.next();
-        String retrievedData = fileToData.get(record.getFirst().toString().trim());
-
-        System.out.printf("MR> %s >> %s\n", record.getFirst().toString().trim(), record.getSecond().toString().trim());
-        assertNotNull(retrievedData);
-        assertEquals(retrievedData, record.getSecond().toString().trim());
-      }
-    }
-  }
-
-  private static void checkMRResultFilesRecursive(Configuration configuration, Path outputDir,
-                                                  String[][] data, String prefix) throws IOException {
-    FileSystem fs = FileSystem.get(configuration);
-
-    // output exists?
-    FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
-    assertEquals(1, fileStatuses.length); // only one
-    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
-    Map<String, String> fileToData = new HashMap<>();
-    String currentPath = prefix;
-
-    for (String[] aData : data) {
-      currentPath += Path.SEPARATOR + aData[0];
-      fileToData.put(currentPath + Path.SEPARATOR + "file.txt", aData[1]);
-    }
-
-    // read a chunk to check content
-    try (SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<>(
-        fileStatuses[0].getPath(), true, configuration)){
-      while (iterator.hasNext()) {
-        Pair<Text, Text> record = iterator.next();
-        System.out.printf("MR-Recur > Trying to check: %s\n", record.getFirst().toString().trim());
-        String retrievedData = fileToData.get(record.getFirst().toString().trim());
-        assertNotNull(retrievedData);
-        assertEquals(retrievedData, record.getSecond().toString().trim());
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/doc/MultipleFieldsDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/MultipleFieldsDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/MultipleFieldsDocument.java
deleted file mode 100644
index 7483b2d..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/MultipleFieldsDocument.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.TextField;
-
-/**
- * Used for testing lucene2seq
- */
-@Deprecated
-public class MultipleFieldsDocument extends SingleFieldDocument {
-
-  public static final String FIELD1 = "field1";
-  public static final String FIELD2 = "field2";
-
-  private String field1;
-  private String field2;
-
-  public MultipleFieldsDocument(String id, String field, String field1, String field2) {
-    super(id, field);
-    this.field1 = field1;
-    this.field2 = field2;
-  }
-
-  public String getField1() {
-    return field1;
-  }
-
-  public String getField2() {
-    return field2;
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = super.asLuceneDocument();
-
-    document.add(new TextField(FIELD1, this.field1, Field.Store.YES));
-    document.add(new TextField(FIELD2, this.field2, Field.Store.YES));
-
-    return document;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
deleted file mode 100644
index e06e8d6..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/NumericFieldDocument.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.IntField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-
-/**
- * Document with numeric field.
- */
-@Deprecated
-public class NumericFieldDocument extends SingleFieldDocument {
-
-  public static final String NUMERIC_FIELD = "numeric";
-
-  private int numericField;
-
-  public NumericFieldDocument(String id, String field, int numericField) {
-    super(id, field);
-    this.numericField = numericField;
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = new Document();
-
-    document.add(new StringField(ID_FIELD, getId(), Field.Store.YES));
-    document.add(new TextField(FIELD, getField(), Field.Store.YES));
-    document.add(new IntField(NUMERIC_FIELD, numericField, Field.Store.YES));
-
-    return document;
-  }
-
-  public int getNumericField() {
-    return numericField;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java
deleted file mode 100644
index 4636a51..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/SingleFieldDocument.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
-
-/**
- * Used for testing lucene2seq
- */
-@Deprecated
-public class SingleFieldDocument implements TestDocument {
-
-  public static final String ID_FIELD = "idField";
-  public static final String FIELD = "field";
-
-  private String id;
-  private String field;
-
-  public SingleFieldDocument(String id, String field) {
-    this.id = id;
-    this.field = field;
-  }
-
-  @Override
-  public String getId() {
-    return id;
-  }
-
-  @Override
-  public String getField() {
-    return field;
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = new Document();
-
-    Field idField = new StringField(ID_FIELD, getId(), Field.Store.YES);
-    Field field = new TextField(FIELD, getField(), Field.Store.YES);
-
-    document.add(idField);
-    document.add(field);
-
-    return document;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java
deleted file mode 100644
index 7243c71..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/TestDocument.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-@Deprecated
-public interface TestDocument {
-
-  String getId();
-
-  String getField();
-
-  Document asLuceneDocument();
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java b/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java
deleted file mode 100644
index 6eb43f6..0000000
--- a/integration/src/test/java/org/apache/mahout/text/doc/UnstoredFieldsDocument.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text.doc;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
-
-/**
- * Used for testing lucene2seq
- */
-@Deprecated
-public class UnstoredFieldsDocument extends SingleFieldDocument {
-
-  public static final String UNSTORED_FIELD = "unstored";
-
-  public UnstoredFieldsDocument(String id, String field) {
-    super(id, field);
-  }
-
-  @Override
-  public Document asLuceneDocument() {
-    Document document = super.asLuceneDocument();
-
-    document.add(new StringField(UNSTORED_FIELD, "", Field.Store.NO));
-
-    return document;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java
----------------------------------------------------------------------
diff --git a/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java b/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java
deleted file mode 100644
index 65b308f..0000000
--- a/integration/src/test/java/org/apache/mahout/utils/Bump125Test.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.utils;
-
-import com.google.common.collect.Lists;
-
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-
-import java.util.Iterator;
-
-public class Bump125Test extends MahoutTestCase {
-  @Test
-  public void testIncrement() throws Exception {
-    Iterator<Integer> ref = Lists.newArrayList(1, 2, 3, 4, 5, 6, 7,
-            8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 50, 60,
-            70, 80, 100, 120, 140, 160, 180, 200, 250, 300, 350,
-            400, 500, 600, 700, 800, 1000, 1200, 1400, 1600, 1800,
-            2000, 2500, 3000, 3500, 4000, 5000, 6000, 7000)
-            .iterator();
-    Bump125 b = new Bump125();
-    for (int i = 0; i < 50; i++) {
-      long x = b.increment();
-      assertEquals(ref.next().longValue(), x);
-    }
-  }
-}


Mime
View raw message