mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From robina...@apache.org
Subject svn commit: r909861 [4/4] - in /lucene/mahout/trunk/utils/src: main/java/org/apache/mahout/clustering/lda/ main/java/org/apache/mahout/text/ main/java/org/apache/mahout/utils/ main/java/org/apache/mahout/utils/clustering/ main/java/org/apache/mahout/ut...
Date Sat, 13 Feb 2010 17:56:01 GMT
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java
Sat Feb 13 17:55:56 2010
@@ -22,9 +22,8 @@
 
 import java.util.HashMap;
 
-import junit.framework.TestCase;
+import junit.framework.Assert;
 
-import org.apache.mahout.utils.nlp.collocations.llr.Gram;
 import org.junit.Test;
 
 public class GramTest {
@@ -34,36 +33,36 @@
     Gram one = new Gram("foo", 2, HEAD);
     Gram two = new Gram("foo", 3, HEAD);
     
-    TestCase.assertTrue(one.equals(two));
-    TestCase.assertTrue(two.equals(one));
+    Assert.assertTrue(one.equals(two));
+    Assert.assertTrue(two.equals(one));
     
     Gram three = new Gram("foo", 4, TAIL);
     Gram four = new Gram("foo");
     
-    TestCase.assertTrue(!three.equals(two));
-    TestCase.assertTrue(four.equals(one));
-    TestCase.assertTrue(one.equals(four));
+    Assert.assertTrue(!three.equals(two));
+    Assert.assertTrue(four.equals(one));
+    Assert.assertTrue(one.equals(four));
     
     Gram five = new Gram("foobar", 4, TAIL);
     
-    TestCase.assertTrue(!five.equals(four));
-    TestCase.assertTrue(!five.equals(three));
-    TestCase.assertTrue(!five.equals(two));
-    TestCase.assertTrue(!five.equals(one));
+    Assert.assertTrue(!five.equals(four));
+    Assert.assertTrue(!five.equals(three));
+    Assert.assertTrue(!five.equals(two));
+    Assert.assertTrue(!five.equals(one));
   }
   
   @Test
   public void testHashing() {
-    Gram[] input = 
+    Gram[] input =
     {
-        new Gram("foo", 2, HEAD),
-        new Gram("foo", 3, HEAD),
-        new Gram("foo", 4, TAIL),
-        new Gram("foo", 5, TAIL),
-        new Gram("bar", 6, HEAD),
-        new Gram("bar", 7, TAIL),
-        new Gram("bar", 8),
-        new Gram("bar")
+     new Gram("foo", 2, HEAD),
+     new Gram("foo", 3, HEAD),
+     new Gram("foo", 4, TAIL),
+     new Gram("foo", 5, TAIL),
+     new Gram("bar", 6, HEAD),
+     new Gram("bar", 7, TAIL),
+     new Gram("bar", 8),
+     new Gram("bar")
     };
     
     HashMap<Gram,Gram> map = new HashMap<Gram,Gram>();
@@ -78,32 +77,32 @@
     
     // frequencies of the items in the map.
     int[] freq = {
-        5,
-        3,
-        9,
-        5,
-        15,
-        7,
-        8,
-        1
+                  5,
+                  3,
+                  9,
+                  5,
+                  15,
+                  7,
+                  8,
+                  1
     };
     
     // true if the index should be the item in the map
     boolean[] memb = {
-        true,
-        false,
-        true,
-        false,
-        true,
-        true,
-        false,
-        false
+                      true,
+                      false,
+                      true,
+                      false,
+                      true,
+                      true,
+                      false,
+                      false
     };
     
     for (int i = 0; i < input.length; i++) {
       System.err.println(i);
-      TestCase.assertEquals(freq[i], input[i].getFrequency());
-      TestCase.assertEquals(memb[i], input[i] == map.get(input[i]));
+      Assert.assertEquals(freq[i], input[i].getFrequency());
+      Assert.assertEquals(memb[i], input[i] == map.get(input[i]));
     }
   }
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java
Sat Feb 13 17:55:56 2010
@@ -37,15 +37,15 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** Test the LLRReducer 
+/** Test the LLRReducer
  *  FIXME: Add negative test cases.
  */
 @SuppressWarnings("deprecation")
 public class LLRReducerTest {
-
-  private static final Logger log = 
+  
+  private static final Logger log =
     LoggerFactory.getLogger(LLRReducerTest.class);
-
+  
   Reporter reporter;
   LLCallback ll;
   LLCallback cl;
@@ -54,11 +54,11 @@
   OutputCollector<Text,DoubleWritable> collector = new OutputCollector<Text,DoubleWritable>()
{
     @Override
     public void collect(Text key, DoubleWritable value) throws IOException {
-      log.info(key.toString() + " " + value.toString());
+      LLRReducerTest.log.info(key.toString() + " " + value.toString());
     }
   };
-
-
+  
+  
   @Before
   public void setUp() {
     reporter  = EasyMock.createMock(Reporter.class);
@@ -66,7 +66,7 @@
     cl        = new LLCallback() {
       @Override
       public double logLikelihoodRatio(int k11, int k12, int k21, int k22) {
-        log.info("k11:" + k11 + " k12:" + k12 + " k21:" + k21 + " k22:" + k22);
+        LLRReducerTest.log.info("k11:" + k11 + " k12:" + k12 + " k21:" + k21 + " k22:" +
k22);
         try {
           return LogLikelihood.logLikelihoodRatio(k11, k12, k21, k22);
         }
@@ -75,48 +75,48 @@
           return -1;
         }
       }
-
+      
     };
   }
-
+  
   @Test
   public void testReduce() throws Exception {
     LLRReducer reducer = new LLRReducer(ll);
-
-    // test input, input[*][0] is the key, 
+    
+    // test input, input[*][0] is the key,
     // input[*][1..n] are the values passed in via
     // the iterator.
     
     
     Gram[][] input = {
-        {new Gram("the best",  1), new Gram("the",   2, HEAD), new Gram("best",  1, TAIL)
},
-        {new Gram("best of",   1), new Gram("best",  1, HEAD), new Gram("of",    2, TAIL)
},
-        {new Gram("of times",  2), new Gram("of",    2, HEAD), new Gram("times", 2, TAIL)
},
-        {new Gram("times the", 1), new Gram("times", 1, HEAD), new Gram("the",   1, TAIL)
},
-        {new Gram("the worst", 1), new Gram("the",   2, HEAD), new Gram("worst", 1, TAIL)
},
-        {new Gram("worst of",  1), new Gram("worst", 1, HEAD), new Gram("of",    2, TAIL)
}
+                      {new Gram("the best",  1), new Gram("the",   2, HEAD), new Gram("best",
 1, TAIL) },
+                      {new Gram("best of",   1), new Gram("best",  1, HEAD), new Gram("of",
   2, TAIL) },
+                      {new Gram("of times",  2), new Gram("of",    2, HEAD), new Gram("times",
2, TAIL) },
+                      {new Gram("times the", 1), new Gram("times", 1, HEAD), new Gram("the",
  1, TAIL) },
+                      {new Gram("the worst", 1), new Gram("the",   2, HEAD), new Gram("worst",
1, TAIL) },
+                      {new Gram("worst of",  1), new Gram("worst", 1, HEAD), new Gram("of",
   2, TAIL) }
     };
-
+    
     int[][] expectations = {
-        // A+B, A+!B, !A+B, !A+!B
-        {1, 1, 0, 5}, // the best
-        {1, 0, 1, 5}, // best of
-        {2, 0, 0, 5}, // of times
-        {1, 0, 0, 6}, // times the
-        {1, 1, 0, 5}, // the worst
-        {1, 0, 1, 5}  // worst of
+                            // A+B, A+!B, !A+B, !A+!B
+                            {1, 1, 0, 5}, // the best
+                            {1, 0, 1, 5}, // best of
+                            {2, 0, 0, 5}, // of times
+                            {1, 0, 0, 6}, // times the
+                            {1, 1, 0, 5}, // the worst
+                            {1, 0, 1, 5}  // worst of
     };
-
+    
     for (int[] ee: expectations) {
       EasyMock.expect(ll.logLikelihoodRatio(ee[0], ee[1], ee[2], ee[3])).andDelegateTo(cl);
     }
-
+    
     EasyMock.replay(ll);
-
+    
     JobConf config = new JobConf(CollocDriver.class);
     config.set(LLRReducer.NGRAM_TOTAL, "7");
     reducer.configure(config);
-
+    
     for (Gram[] ii: input) {
       List<Gram> vv = new LinkedList<Gram>();
       for (int i = 1; i < ii.length; i++) {
@@ -124,7 +124,7 @@
       }
       reducer.reduce(ii[0], vv.iterator(), collector, reporter);
     }
-
+    
     EasyMock.verify(ll);
   }
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java
Sat Feb 13 17:55:56 2010
@@ -42,41 +42,41 @@
  */
 @SuppressWarnings("deprecation")
 public class NGramCollectorTest {
-
+  
   OutputCollector<Gram,Gram> collector;
   Reporter reporter;
-
+  
   @Before
   @SuppressWarnings("unchecked")
   public void setUp() {
     collector = EasyMock.createMock(OutputCollector.class);
     reporter  = EasyMock.createMock(Reporter.class);
   }
-
+  
   @Test
   public void testCollectNgrams() throws Exception {
-
+    
     String input = "the best of times the worst of times";
-
-    String[][] values = 
+    
+    String[][] values =
       new String[][]{
-        {"h_the",   "the best"},
-        {"t_best",  "the best"},
-        {"h_best",  "best of"},
-        {"t_of",    "best of"},
-        {"h_of",    "of times"},
-        {"t_times", "of times"},
-        {"h_times", "times the"},
-        {"t_the",   "times the"},
-        {"h_the",   "the worst"},
-        {"t_worst", "the worst"},
-        {"h_worst", "worst of"},
-        {"t_of",    "worst of"},
-        {"h_of",    "of times"},
-        {"t_times", "of times"}
+                     {"h_the",   "the best"},
+                     {"t_best",  "the best"},
+                     {"h_best",  "best of"},
+                     {"t_of",    "best of"},
+                     {"h_of",    "of times"},
+                     {"t_times", "of times"},
+                     {"h_times", "times the"},
+                     {"t_the",   "times the"},
+                     {"h_the",   "the worst"},
+                     {"t_worst", "the worst"},
+                     {"h_worst", "worst of"},
+                     {"t_of",    "worst of"},
+                     {"h_of",    "of times"},
+                     {"t_times", "of times"}
     };
     // set up expectations for mocks. ngram max size = 2
-
+    
     // setup expectations
     for (String[] v: values) {
       Type p = v[0].startsWith("h") ? HEAD : TAIL;
@@ -84,24 +84,24 @@
       Gram ngram = new Gram(v[1]);
       collector.collect(subgram, ngram);
     }
-
+    
     reporter.incrCounter(NGRAM_TOTAL, 7);
     EasyMock.replay(reporter, collector);
     
     Reader r = new StringReader(input);
-
+    
     JobConf conf = new JobConf();
     conf.set(NGramCollector.MAX_SHINGLE_SIZE, "2");
     conf.set(NGramCollector.ANALYZER_CLASS, TestAnalyzer.class.getName());
-
+    
     NGramCollector c = new NGramCollector();
     c.configure(conf);
     
     c.collectNgrams(r, collector, reporter);
-
+    
     EasyMock.verify(reporter, collector);
   }
-
+  
   /** A lucene 2.9 standard analyzer with no stopwords. */
   public static class TestAnalyzer extends Analyzer {
     final Analyzer a;

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
Sat Feb 13 17:55:56 2010
@@ -17,40 +17,40 @@
 
 package org.apache.mahout.utils.vectors;
 
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Random;
+
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.math.function.UnaryFunction;
 import org.apache.mahout.math.Vector;
-
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-import java.util.Random;
+import org.apache.mahout.math.function.UnaryFunction;
 
 public class RandomVectorIterable implements Iterable<Vector>{
-
+  
   private int numItems = 100;
   public enum VectorType {DENSE, SPARSE}
-
+  
   private VectorType type = VectorType.SPARSE;
-
+  
   public RandomVectorIterable() {
   }
-
+  
   public RandomVectorIterable(int numItems) {
     this.numItems = numItems;
   }
-
+  
   public RandomVectorIterable(int numItems, VectorType type) {
     this.numItems = numItems;
     this.type = type;
   }
-
+  
   @Override
   public Iterator<Vector> iterator() {
     return new VectIterator();
   }
-
+  
   private class VectIterator implements Iterator<Vector>{
     private int count = 0;
     private final Random random = RandomUtils.getRandom();
@@ -58,7 +58,7 @@
     public boolean hasNext() {
       return count < numItems;
     }
-
+    
     @Override
     public Vector next() {
       if (!hasNext()) {
@@ -74,7 +74,7 @@
       count++;
       return result;
     }
-
+    
     @Override
     public void remove() {
       throw new UnsupportedOperationException();

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java
Sat Feb 13 17:55:56 2010
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.utils.vectors;
 
+import java.io.File;
+
+import junit.framework.Assert;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -27,13 +31,11 @@
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.utils.vectors.io.SequenceFileVectorWriter;
 
-import java.io.File;
-
 public class SequenceFileVectorIterableTest extends MahoutTestCase {
-
+  
   private File tmpLoc;
   private File tmpFile;
-
+  
   @Override
   public void setUp() throws Exception {
     super.setUp();
@@ -44,14 +46,14 @@
     tmpFile = File.createTempFile("sfvit", ".dat", tmpLoc);
     tmpFile.deleteOnExit();
   }
-
+  
   @Override
   public void tearDown() throws Exception {
     tmpFile.delete();
     tmpLoc.delete();
     super.tearDown();
   }
-
+  
   public void testIterable() throws Exception {
     Path path = new Path(tmpFile.getAbsolutePath());
     Configuration conf = new Configuration();
@@ -61,7 +63,7 @@
     RandomVectorIterable iter = new RandomVectorIterable(50);
     writer.write(iter);
     writer.close();
-
+    
     SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
     SequenceFileVectorIterable sfvi = new SequenceFileVectorIterable(seqReader);
     int count = 0;
@@ -70,6 +72,6 @@
       count++;
     }
     seqReader.close();
-    assertEquals(50, count);
+    Assert.assertEquals(50, count);
   }
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java
Sat Feb 13 17:55:56 2010
@@ -17,253 +17,255 @@
 
 package org.apache.mahout.utils.vectors.arff;
 
+import java.text.DateFormat;
+import java.util.Iterator;
+import java.util.Map;
+
+import junit.framework.Assert;
+
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 
-import java.text.DateFormat;
-import java.util.Iterator;
-import java.util.Map;
-
 public class ARFFVectorIterableTest extends MahoutTestCase {
-
+  
   public void testValues() throws Exception {
     StringBuilder builder = new StringBuilder();
     builder.append("%comments").append('\n').append("@RELATION Mahout").append('\n')
-            .append("@ATTRIBUTE foo numeric").append('\n')
-            .append("@ATTRIBUTE bar numeric").append('\n')
-            .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n')
-            .append("@ATTRIBUTE junk string").append('\n')
-            .append("@ATTRIBUTE theNominal {c,b,a}").append('\n')
-            .append("@DATA").append('\n')
-            .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n')
-            .append("2,3").append('\n')
-            .append("{0 5,1 23}").append('\n');
+    .append("@ATTRIBUTE foo numeric").append('\n')
+    .append("@ATTRIBUTE bar numeric").append('\n')
+    .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n')
+    .append("@ATTRIBUTE junk string").append('\n')
+    .append("@ATTRIBUTE theNominal {c,b,a}").append('\n')
+    .append("@DATA").append('\n')
+    .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n')
+    .append("2,3").append('\n')
+    .append("{0 5,1 23}").append('\n');
     ARFFModel model = new MapBackedARFFModel();
     ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(), model);
-    assertEquals("Mahout", iterable.getModel().getRelation());
+    Assert.assertEquals("Mahout", iterable.getModel().getRelation());
     Map<String, Integer> bindings = iterable.getModel().getLabelBindings();
-    assertNotNull(bindings);
-    assertEquals(5, bindings.size());
+    Assert.assertNotNull(bindings);
+    Assert.assertEquals(5, bindings.size());
     Iterator<Vector> iter = iterable.iterator();
-    assertTrue(iter.hasNext());
+    Assert.assertTrue(iter.hasNext());
     Vector next = iter.next();
-    assertNotNull(next);
-    assertTrue("Wrong instanceof", next instanceof DenseVector);
-    assertEquals(1.0, next.get(0));
-    assertEquals(2.0, next.get(1));
-    assertTrue(iter.hasNext());
+    Assert.assertNotNull(next);
+    Assert.assertTrue("Wrong instanceof", next instanceof DenseVector);
+    Assert.assertEquals(1.0, next.get(0));
+    Assert.assertEquals(2.0, next.get(1));
+    Assert.assertTrue(iter.hasNext());
     next = iter.next();
-    assertNotNull(next);
-    assertTrue("Wrong instanceof", next instanceof DenseVector);
-    assertEquals(2.0, next.get(0));
-    assertEquals(3.0, next.get(1));
-
-    assertTrue(iter.hasNext());
+    Assert.assertNotNull(next);
+    Assert.assertTrue("Wrong instanceof", next instanceof DenseVector);
+    Assert.assertEquals(2.0, next.get(0));
+    Assert.assertEquals(3.0, next.get(1));
+    
+    Assert.assertTrue(iter.hasNext());
     next = iter.next();
-    assertNotNull(next);
-    assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector);
-    assertEquals(5.0, next.get(0));
-    assertEquals(23.0, next.get(1));
-
-    assertFalse(iter.hasNext());
+    Assert.assertNotNull(next);
+    Assert.assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector);
+    Assert.assertEquals(5.0, next.get(0));
+    Assert.assertEquals(23.0, next.get(1));
+    
+    Assert.assertFalse(iter.hasNext());
   }
-
+  
   public void testDense() throws Exception {
     ARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_DENSE_ARFF, model);
+    ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_DENSE_ARFF,
model);
     int count = 0;
     for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof DenseVector);
+      Assert.assertTrue("Vector is not dense", vector instanceof DenseVector);
       count++;
     }
-    assertEquals(10, count);
+    Assert.assertEquals(10, count);
   }
-
+  
   public void testSparse() throws Exception {
     ARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_SPARSE_ARFF, model);
+    ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_SPARSE_ARFF,
model);
     int count = 0;
     for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+      Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
       count++;
     }
-    assertEquals(10, count);
+    Assert.assertEquals(10, count);
   }
-
+  
   public void testNonNumeric() throws Exception {
-
+    
     MapBackedARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+    ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF,
model);
     int count = 0;
     for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+      Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
       count++;
     }
-    assertEquals(10, count);
+    Assert.assertEquals(10, count);
     Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
-    assertNotNull(nominalMap);
-    assertEquals(1, nominalMap.size());
+    Assert.assertNotNull(nominalMap);
+    Assert.assertEquals(1, nominalMap.size());
     Map<String, Integer> noms = nominalMap.get("bar");
-    assertNotNull("nominals for bar are null", noms);
-    assertEquals(2, noms.size());
+    Assert.assertNotNull("nominals for bar are null", noms);
+    Assert.assertEquals(2, noms.size());
     Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
-    assertNotNull("Type map null", integerARFFTypeMap);
-    assertEquals(5, integerARFFTypeMap.size());
+    Assert.assertNotNull("Type map null", integerARFFTypeMap);
+    Assert.assertEquals(5, integerARFFTypeMap.size());
     Map<String, Long> words = model.getWords();
-    assertNotNull("words null", words);
-    assertEquals(10, words.size());
+    Assert.assertNotNull("words null", words);
+    Assert.assertEquals(10, words.size());
     //System.out.println("Words: " + words);
     Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
-    assertNotNull("date format null", integerDateFormatMap);
-    assertEquals(1, integerDateFormatMap.size());
-
+    Assert.assertNotNull("date format null", integerDateFormatMap);
+    Assert.assertEquals(1, integerDateFormatMap.size());
+    
   }
-
+  
   public void testMultipleNoms() throws Exception {
     MapBackedARFFModel model = new MapBackedARFFModel();
-    ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model);
+    ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF,
model);
     int count = 0;
     for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+      Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
       count++;
     }
-    assertEquals(10, count);
+    Assert.assertEquals(10, count);
     Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap();
-    assertNotNull(nominalMap);
-    assertEquals(1, nominalMap.size());
+    Assert.assertNotNull(nominalMap);
+    Assert.assertEquals(1, nominalMap.size());
     Map<String, Integer> noms = nominalMap.get("bar");
-    assertNotNull("nominals for bar are null", noms);
-    assertEquals(2, noms.size());
+    Assert.assertNotNull("nominals for bar are null", noms);
+    Assert.assertEquals(2, noms.size());
     Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap();
-    assertNotNull("Type map null", integerARFFTypeMap);
-    assertEquals(5, integerARFFTypeMap.size());
+    Assert.assertNotNull("Type map null", integerARFFTypeMap);
+    Assert.assertEquals(5, integerARFFTypeMap.size());
     Map<String, Long> words = model.getWords();
-    assertNotNull("words null", words);
-    assertEquals(10, words.size());
+    Assert.assertNotNull("words null", words);
+    Assert.assertEquals(10, words.size());
     //System.out.println("Words: " + words);
     Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap();
-    assertNotNull("date format null", integerDateFormatMap);
-    assertEquals(1, integerDateFormatMap.size());
+    Assert.assertNotNull("date format null", integerDateFormatMap);
+    Assert.assertEquals(1, integerDateFormatMap.size());
     model = new MapBackedARFFModel(model.getWords(), model.getWordCount(),
-            model.getNominalMap());
-    iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model);
+      model.getNominalMap());
+    iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF2, model);
     count = 0;
     for (Vector vector : iterable) {
-      assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
+      Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector);
       count++;
     }
     nominalMap = model.getNominalMap();
-    assertNotNull(nominalMap);
-    assertEquals(2, nominalMap.size());
+    Assert.assertNotNull(nominalMap);
+    Assert.assertEquals(2, nominalMap.size());
     noms = nominalMap.get("test");
-    assertNotNull("nominals for bar are null", noms);
-    assertEquals(2, noms.size());
+    Assert.assertNotNull("nominals for bar are null", noms);
+    Assert.assertEquals(2, noms.size());
   }
-
-
+  
+  
   private static final String SAMPLE_DENSE_ARFF = "   % Comments\n" +
-          "   % \n" +
-          "   % Comments go here" +
-          "   % \n" +
-          "   @RELATION Mahout\n" +
-          '\n' +
-          "   @ATTRIBUTE foo  NUMERIC\n" +
-          "   @ATTRIBUTE bar   NUMERIC\n" +
-          "   @ATTRIBUTE hockey  NUMERIC\n" +
-          "   @ATTRIBUTE football   NUMERIC\n" +
-          "  \n" +
-          '\n' +
-          '\n' +
-          "   @DATA\n" +
-          "   23.1,3.23,1.2,0.2\n" +
-          "   2.9,3.0,1.2,0.2\n" +
-          "   2.7,3.2,1.3,0.2\n" +
-          "   2.6,3.1,1.23,0.2\n" +
-          "   23.0,3.6,1.2,0.2\n" +
-          "   23.2,3.9,1.7,0.2\n" +
-          "   2.6,3.2,1.2,0.3\n" +
-          "   23.0,3.2,1.23,0.2\n" +
-          "   2.2,2.9,1.2,0.2\n" +
-          "   2.9,3.1,1.23,0.1\n";
-
-
+  "   % \n" +
+  "   % Comments go here" +
+  "   % \n" +
+  "   @RELATION Mahout\n" +
+  '\n' +
+  "   @ATTRIBUTE foo  NUMERIC\n" +
+  "   @ATTRIBUTE bar   NUMERIC\n" +
+  "   @ATTRIBUTE hockey  NUMERIC\n" +
+  "   @ATTRIBUTE football   NUMERIC\n" +
+  "  \n" +
+  '\n' +
+  '\n' +
+  "   @DATA\n" +
+  "   23.1,3.23,1.2,0.2\n" +
+  "   2.9,3.0,1.2,0.2\n" +
+  "   2.7,3.2,1.3,0.2\n" +
+  "   2.6,3.1,1.23,0.2\n" +
+  "   23.0,3.6,1.2,0.2\n" +
+  "   23.2,3.9,1.7,0.2\n" +
+  "   2.6,3.2,1.2,0.3\n" +
+  "   23.0,3.2,1.23,0.2\n" +
+  "   2.2,2.9,1.2,0.2\n" +
+  "   2.9,3.1,1.23,0.1\n";
+  
+  
   private static final String SAMPLE_SPARSE_ARFF = "   % Comments\n" +
-          "   % \n" +
-          "   % Comments go here" +
-          "   % \n" +
-          "   @RELATION Mahout\n" +
-          '\n' +
-          "   @ATTRIBUTE foo  NUMERIC\n" +
-          "   @ATTRIBUTE bar   NUMERIC\n" +
-          "   @ATTRIBUTE hockey  NUMERIC\n" +
-          "   @ATTRIBUTE football   NUMERIC\n" +
-          "   @ATTRIBUTE tennis   NUMERIC\n" +
-          "  \n" +
-          '\n' +
-          '\n' +
-          "   @DATA\n" +
-          "   {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
-          "   {0 2.9}\n" +
-          "   {0 2.7,2 3.2,3 1.3,4 0.2}\n" +
-          "   {1 2.6,2 3.1,3 1.23,4 0.2}\n" +
-          "   {1 23.0,2 3.6,3 1.2,4 0.2}\n" +
-          "   {0 23.2,1 3.9,3 1.7,4 0.2}\n" +
-          "   {0 2.6,1 3.2,2 1.2,4 0.3}\n" +
-          "   {1 23.0,2 3.2,3 1.23}\n" +
-          "   {1 2.2,2 2.94 0.2}\n" +
-          "   {1 2.9,2 3.1}\n";
-
+  "   % \n" +
+  "   % Comments go here" +
+  "   % \n" +
+  "   @RELATION Mahout\n" +
+  '\n' +
+  "   @ATTRIBUTE foo  NUMERIC\n" +
+  "   @ATTRIBUTE bar   NUMERIC\n" +
+  "   @ATTRIBUTE hockey  NUMERIC\n" +
+  "   @ATTRIBUTE football   NUMERIC\n" +
+  "   @ATTRIBUTE tennis   NUMERIC\n" +
+  "  \n" +
+  '\n' +
+  '\n' +
+  "   @DATA\n" +
+  "   {1 23.1,2 3.23,3 1.2,4 0.2}\n" +
+  "   {0 2.9}\n" +
+  "   {0 2.7,2 3.2,3 1.3,4 0.2}\n" +
+  "   {1 2.6,2 3.1,3 1.23,4 0.2}\n" +
+  "   {1 23.0,2 3.6,3 1.2,4 0.2}\n" +
+  "   {0 23.2,1 3.9,3 1.7,4 0.2}\n" +
+  "   {0 2.6,1 3.2,2 1.2,4 0.3}\n" +
+  "   {1 23.0,2 3.2,3 1.23}\n" +
+  "   {1 2.2,2 2.94 0.2}\n" +
+  "   {1 2.9,2 3.1}\n";
+  
   private static final String NON_NUMERIC_ARFF = "   % Comments\n" +
-          "   % \n" +
-          "   % Comments go here" +
-          "   % \n" +
-          "   @RELATION Mahout\n" +
-          '\n' +
-          "   @ATTRIBUTE junk  NUMERIC\n" +
-          "   @ATTRIBUTE foo  NUMERIC\n" +
-          "   @ATTRIBUTE bar   {c,d}\n" +
-          "   @ATTRIBUTE hockey  string\n" +
-          "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
-          "  \n" +
-          '\n' +
-          '\n' +
-          "   @DATA\n" +
-          "   {2 c,3 gretzky,4 1973-10-23}\n" +
-          "   {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
-          "   {2 c,3 bossy,4 1981-10-23}\n" +
-          "   {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" +
-          "   {3 esposito,4 1973-04-23}\n" +
-          "   {1 23.2,2 d,3 chelios,4 1999-2-23}\n" +
-          "   {3 richard,4 1973-10-12}\n" +
-          "   {3 howe,4 1983-06-23}\n" +
-          "   {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
-          "   {2 c,3 roy,4 1973-10-13}\n";
-
+  "   % \n" +
+  "   % Comments go here" +
+  "   % \n" +
+  "   @RELATION Mahout\n" +
+  '\n' +
+  "   @ATTRIBUTE junk  NUMERIC\n" +
+  "   @ATTRIBUTE foo  NUMERIC\n" +
+  "   @ATTRIBUTE bar   {c,d}\n" +
+  "   @ATTRIBUTE hockey  string\n" +
+  "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
+  "  \n" +
+  '\n' +
+  '\n' +
+  "   @DATA\n" +
+  "   {2 c,3 gretzky,4 1973-10-23}\n" +
+  "   {1 2.9,2 d,3 orr,4 1973-11-23}\n" +
+  "   {2 c,3 bossy,4 1981-10-23}\n" +
+  "   {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" +
+  "   {3 esposito,4 1973-04-23}\n" +
+  "   {1 23.2,2 d,3 chelios,4 1999-2-23}\n" +
+  "   {3 richard,4 1973-10-12}\n" +
+  "   {3 howe,4 1983-06-23}\n" +
+  "   {0 2.2,2 d,3 messier,4 2008-11-23}\n" +
+  "   {2 c,3 roy,4 1973-10-13}\n";
+  
   private static final String NON_NUMERIC_ARFF2 = "   % Comments\n" +
-          "   % \n" +
-          "   % Comments go here" +
-          "   % \n" +
-          "   @RELATION Mahout\n" +
-          '\n' +
-          "   @ATTRIBUTE junk  NUMERIC\n" +
-          "   @ATTRIBUTE foo  NUMERIC\n" +
-          "   @ATTRIBUTE test   {f,z}\n" +
-          "   @ATTRIBUTE hockey  string\n" +
-          "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
-          "  \n" +
-          '\n' +
-          '\n' +
-          "   @DATA\n" +
-          "   {2 f,3 gretzky,4 1973-10-23}\n" +
-          "   {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
-          "   {2 f,3 bossy,4 1981-10-23}\n" +
-          "   {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" +
-          "   {3 esposito,4 1973-04-23}\n" +
-          "   {1 23.2,2 z,3 chelios,4 1999-2-23}\n" +
-          "   {3 richard,4 1973-10-12}\n" +
-          "   {3 howe,4 1983-06-23}\n" +
-          "   {0 2.2,2 f,3 messier,4 2008-11-23}\n" +
-          "   {2 f,3 roy,4 1973-10-13}\n";
+  "   % \n" +
+  "   % Comments go here" +
+  "   % \n" +
+  "   @RELATION Mahout\n" +
+  '\n' +
+  "   @ATTRIBUTE junk  NUMERIC\n" +
+  "   @ATTRIBUTE foo  NUMERIC\n" +
+  "   @ATTRIBUTE test   {f,z}\n" +
+  "   @ATTRIBUTE hockey  string\n" +
+  "   @ATTRIBUTE football   date \"yyyy-MM-dd\"\n" +
+  "  \n" +
+  '\n' +
+  '\n' +
+  "   @DATA\n" +
+  "   {2 f,3 gretzky,4 1973-10-23}\n" +
+  "   {1 2.9,2 z,3 orr,4 1973-11-23}\n" +
+  "   {2 f,3 bossy,4 1981-10-23}\n" +
+  "   {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" +
+  "   {3 esposito,4 1973-04-23}\n" +
+  "   {1 23.2,2 z,3 chelios,4 1999-2-23}\n" +
+  "   {3 richard,4 1973-10-12}\n" +
+  "   {3 howe,4 1983-06-23}\n" +
+  "   {0 2.2,2 f,3 messier,4 2008-11-23}\n" +
+  "   {2 f,3 roy,4 1973-10-13}\n";
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java
Sat Feb 13 17:55:56 2010
@@ -17,27 +17,29 @@
 
 package org.apache.mahout.utils.vectors.io;
 
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileSystem;
+import java.io.File;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.Assert;
+
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.utils.vectors.RandomVectorIterable;
 
-import java.io.File;
-import java.io.StringWriter;
-import java.util.List;
-import java.util.ArrayList;
-
 public class VectorWriterTest extends MahoutTestCase {
-
+  
   private File tmpLoc;
   private File tmpFile;
-
+  
   @Override
   public void setUp() throws Exception {
     super.setUp();
@@ -48,14 +50,14 @@
     tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc);
     tmpFile.deleteOnExit();
   }
-
+  
   @Override
   public void tearDown() throws Exception {
     tmpFile.delete();
     tmpLoc.delete();
     super.tearDown();
   }
-
+  
   public void testSFVW() throws Exception {
     Path path = new Path(tmpFile.getAbsolutePath());
     Configuration conf = new Configuration();
@@ -65,7 +67,7 @@
     RandomVectorIterable iter = new RandomVectorIterable(50);
     writer.write(iter);
     writer.close();
-
+    
     SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
     LongWritable key = new LongWritable();
     VectorWritable value = new VectorWritable();
@@ -73,9 +75,9 @@
     while (seqReader.next(key, value)){
       count++;
     }
-    assertEquals(count + " does not equal: " + 50, 50, count);
+    Assert.assertEquals(count + " does not equal: " + 50, 50, count);
   }
-
+  
   public void test() throws Exception {
     StringWriter strWriter = new StringWriter();
     VectorWriter writer = new JWriterVectorWriter(strWriter);
@@ -85,8 +87,8 @@
     writer.write(vectors);
     writer.close();
     StringBuffer buffer = strWriter.getBuffer();
-    assertNotNull(buffer);
-    assertTrue(buffer.length() > 0);
-
+    Assert.assertNotNull(buffer);
+    Assert.assertTrue(buffer.length() > 0);
+    
   }
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Sat Feb 13 17:55:56 2010
@@ -17,63 +17,65 @@
 
 package org.apache.mahout.utils.vectors.lucene;
 
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
+import junit.framework.Assert;
+
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.RandomAccessSparseVector;
-import org.apache.mahout.utils.vectors.Weight;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.utils.vectors.TFIDF;
 import org.apache.mahout.utils.vectors.TermInfo;
-import org.apache.mahout.math.Vector;
+import org.apache.mahout.utils.vectors.Weight;
 
 public class LuceneIterableTest extends MahoutTestCase {
   private RAMDirectory directory;
-
+  
   private static final String [] DOCS = {
-        "The quick red fox jumped over the lazy brown dogs.",
-        "Mary had a little lamb whose fleece was white as snow.",
-        "Moby Dick is a story of a whale and a man obsessed.",
-        "The robber wore a black fleece jacket and a baseball cap.",
-        "The English Springer Spaniel is the best of all dogs."
-    };
-
-
+                                         "The quick red fox jumped over the lazy brown dogs.",
+                                         "Mary had a little lamb whose fleece was white as
snow.",
+                                         "Moby Dick is a story of a whale and a man obsessed.",
+                                         "The robber wore a black fleece jacket and a baseball
cap.",
+                                         "The English Springer Spaniel is the best of all
dogs."
+  };
+  
+  
   @Override
   protected void setUp() throws Exception {
     super.setUp();
     directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT),
true, IndexWriter.MaxFieldLength.UNLIMITED);
-    for (int i = 0; i < DOCS.length; i++){
+    for (int i = 0; i < LuceneIterableTest.DOCS.length; i++){
       Document doc = new Document();
       Field id = new Field("id", "doc_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
       doc.add(id);
       //Store both position and offset information
-      Field text = new Field("content", DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
+      Field text = new Field("content", LuceneIterableTest.DOCS[i], Field.Store.NO, Field.Index.ANALYZED,
Field.TermVector.YES);
       doc.add(text);
       writer.addDocument(doc);
     }
     writer.close();
   }
-
+  
   public void testIterable() throws Exception {
     IndexReader reader = IndexReader.open(directory, true);
     Weight weight = new TFIDF();
     TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100);
     VectorMapper mapper = new TFDFMapper(reader, weight, termInfo);
     LuceneIterable iterable = new LuceneIterable(reader, "id", "content", mapper);
-
+    
     //TODO: do something more meaningful here
     for (Vector vector : iterable) {
-      assertNotNull(vector);
-      assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector
instanceof RandomAccessSparseVector);
-      assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size()
> 0);
+      Assert.assertNotNull(vector);
+      Assert.assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class,
vector instanceof RandomAccessSparseVector);
+      Assert.assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size()
> 0);
     }
   }
-
-
+  
+  
 }

Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java?rev=909861&r1=909860&r2=909861&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java
(original)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java
Sat Feb 13 17:55:56 2010
@@ -52,46 +52,48 @@
   public static final String DELIM = " .,?;:!\t\n\r";
   
   public static final String ERRORSET = "`1234567890"
-                                        + "-=~@#$%^&*()_+[]{}'\"/<>|\\";
+    + "-=~@#$%^&*()_+[]{}'\"/<>|\\";
   
   private static final Random random = RandomUtils.getRandom();
   
   private FileSystem fs;
   
   private static char getRandomDelimiter() {
-    return DELIM.charAt(random.nextInt(DELIM.length()));
+    return DictionaryVectorizerTest.DELIM.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.DELIM.length()));
   }
   
   public static String getRandomDocument() {
-    int length = (AVG_DOCUMENT_LENGTH >> 1)
-                 + random.nextInt(AVG_DOCUMENT_LENGTH);
-    StringBuilder sb = new StringBuilder(length * AVG_SENTENCE_LENGTH
-                                         * AVG_WORD_LENGTH);
+    int length = (DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH >> 1)
+    + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH);
+    StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_SENTENCE_LENGTH
+      * DictionaryVectorizerTest.AVG_WORD_LENGTH);
     for (int i = 0; i < length; i++) {
-      sb.append(getRandomSentence());
+      sb.append(DictionaryVectorizerTest.getRandomSentence());
     }
     return sb.toString();
   }
   
   public static String getRandomSentence() {
-    int length = (AVG_SENTENCE_LENGTH >> 1)
-                 + random.nextInt(AVG_SENTENCE_LENGTH);
-    StringBuilder sb = new StringBuilder(length * AVG_WORD_LENGTH);
+    int length = (DictionaryVectorizerTest.AVG_SENTENCE_LENGTH >> 1)
+    + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_SENTENCE_LENGTH);
+    StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_WORD_LENGTH);
     for (int i = 0; i < length; i++) {
-      sb.append(getRandomString()).append(' ');
+      sb.append(DictionaryVectorizerTest.getRandomString()).append(' ');
     }
-    sb.append(getRandomDelimiter());
+    sb.append(DictionaryVectorizerTest.getRandomDelimiter());
     return sb.toString();
   }
   
   public static String getRandomString() {
-    int length = (AVG_WORD_LENGTH >> 1) + random.nextInt(AVG_WORD_LENGTH);
+    int length = (DictionaryVectorizerTest.AVG_WORD_LENGTH >> 1) + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_WORD_LENGTH);
     StringBuilder sb = new StringBuilder(length);
     for (int i = 0; i < length; i++) {
-      sb.append(CHARSET.charAt(random.nextInt(CHARSET.length())));
+      sb.append(DictionaryVectorizerTest.CHARSET.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.CHARSET.length())));
+    }
+    if (DictionaryVectorizerTest.random.nextInt(10) == 0) {
+      sb.append(DictionaryVectorizerTest.ERRORSET.charAt(DictionaryVectorizerTest.random
+        .nextInt(DictionaryVectorizerTest.ERRORSET.length())));
     }
-    if (random.nextInt(10) == 0) sb.append(ERRORSET.charAt(random
-        .nextInt(ERRORSET.length())));
     return sb.toString();
   }
   
@@ -101,7 +103,7 @@
       if (f.isDirectory()) {
         String[] contents = f.list();
         for (String content : contents) {
-          rmr(f.toString() + File.separator + content);
+          DictionaryVectorizerTest.rmr(f.toString() + File.separator + content);
         }
       }
       f.delete();
@@ -111,31 +113,31 @@
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    rmr("output");
-    rmr("testdata");
+    DictionaryVectorizerTest.rmr("output");
+    DictionaryVectorizerTest.rmr("testdata");
     Configuration conf = new Configuration();
     fs = FileSystem.get(conf);
   }
   
   public void testCreateTermFrequencyVectors() throws IOException,
-                                              InterruptedException,
-                                              ClassNotFoundException,
-                                              URISyntaxException {
+  InterruptedException,
+  ClassNotFoundException,
+  URISyntaxException {
     Configuration conf = new Configuration();
     String pathString = "testdata/documents/docs.file";
     Path path = new Path(pathString);
     SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
-        Text.class, Text.class);
+      Text.class, Text.class);
     
-    for (int i = 0; i < NUM_DOCS; i++) {
+    for (int i = 0; i < DictionaryVectorizerTest.NUM_DOCS; i++) {
       writer.append(new Text("Document::ID::" + i), new Text(
-          getRandomDocument()));
+        DictionaryVectorizerTest.getRandomDocument()));
     }
     writer.close();
     Class<? extends Analyzer> analyzer = new StandardAnalyzer(
-        Version.LUCENE_CURRENT).getClass();
+      Version.LUCENE_CURRENT).getClass();
     DocumentProcessor.tokenizeDocuments(pathString, analyzer,
-      "output/tokenized-documents");
+    "output/tokenized-documents");
     DictionaryVectorizer.createTermFrequencyVectors("output/tokenized-documents",
       "output/wordcount", 2, 1, 0.0f, 1, 100, false);
     TFIDFConverter.processTfIdf("output/wordcount/vectors", "output/tfidf/", 100, 1, 99,
1.0f, false);



Mime
View raw message