mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r1211874 - /mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
Date Thu, 08 Dec 2011 13:27:37 GMT
Author: gsingers
Date: Thu Dec  8 13:27:37 2011
New Revision: 1211874

URL: http://svn.apache.org/viewvc?rev=1211874&view=rev
Log:
MAHOUT-688: fix high df test

Modified:
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java?rev=1211874&r1=1211873&r2=1211874&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
Thu Dec  8 13:27:37 2011
@@ -41,7 +41,7 @@ import java.util.List;
 public class HighDFWordsPrunerTest extends MahoutTestCase {
   private static final int NUM_DOCS = 100;
 
-  private static final String[] HIGF_DF_WORDS = {"has", "which", "what", "srtyui"};
+  private static final String[] HIGH_DF_WORDS = {"has", "which", "what", "srtyui"};
 
   private Configuration conf;
   private Path inputPath;
@@ -66,8 +66,8 @@ public class HighDFWordsPrunerTest exten
 
   private String enhanceWithHighDFWords(String initialDoc) {
     StringBuilder sb = new StringBuilder(initialDoc);
-    for (int i = 0; i < HIGF_DF_WORDS.length; i++) {
-      sb.append(' ').append(HIGF_DF_WORDS[i]);
+    for (int i = 0; i < HIGH_DF_WORDS.length; i++) {
+      sb.append(' ').append(HIGH_DF_WORDS[i]);
     }
 
     return sb.toString();
@@ -95,6 +95,9 @@ public class HighDFWordsPrunerTest exten
     if (prune) {
       argList.add("-xs");
       argList.add("3"); // we prune all words that are outside 3*sigma
+    } else {
+      argList.add("--maxDFPercent");
+      argList.add("100"); // the default if, -xs is not specified is to use maxDFPercent,
which defaults to 99%
     }
 
     argList.add("-seq");
@@ -114,9 +117,9 @@ public class HighDFWordsPrunerTest exten
   }
 
   private int[] getHighDFWordsDictionaryIndices(Path dictionaryPath) {
-    int[] highDFWordsDictionaryIndices = new int[HIGF_DF_WORDS.length];
+    int[] highDFWordsDictionaryIndices = new int[HIGH_DF_WORDS.length];
 
-    List<String> highDFWordsList = Arrays.asList(HIGF_DF_WORDS);
+    List<String> highDFWordsList = Arrays.asList(HIGH_DF_WORDS);
 
     for (Pair<Text, IntWritable> record : new SequenceFileDirIterable<Text, IntWritable>(dictionaryPath,
PathType.GLOB,
             null, null, true, conf)) {
@@ -135,10 +138,10 @@ public class HighDFWordsPrunerTest exten
       Vector v = ((NamedVector) value.get()).getDelegate();
       for (int i = 0; i < highDFWordsDictionaryIndices.length; i++) {
         if (prune) {
-          assertTrue("Found vector for which word " + HIGF_DF_WORDS[i] + " is not pruned",
v
+          assertTrue("Found vector for which word '" + HIGH_DF_WORDS[i] + "' is not pruned",
v
                   .get(highDFWordsDictionaryIndices[i]) == 0.0);
         } else {
-          assertTrue("Found vector for which word " + HIGF_DF_WORDS[i] + " is pruned, and
shouldn't have been", v
+          assertTrue("Found vector for which word '" + HIGH_DF_WORDS[i] + "' is pruned, and
shouldn't have been", v
                   .get(highDFWordsDictionaryIndices[i]) != 0.0);
         }
       }



Mime
View raw message