mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1131417 - in /mahout/trunk: core/src/main/java/org/apache/mahout/classifier/ core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/ examples/src/main/java/org/apache/mahout/classifier/bayes/ examples/src/main/java/org/apache/mah...
Date Sat, 04 Jun 2011 15:47:07 GMT
Author: srowen
Date: Sat Jun  4 15:47:07 2011
New Revision: 1131417

URL: http://svn.apache.org/viewvc?rev=1131417&view=rev
Log:
Consistently select Lucene 3.1 behavior. Make BloomTokenFilter final since Lucene requires
it

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Sat
Jun  4 15:47:07 2011
@@ -17,7 +17,11 @@
 
 package org.apache.mahout.classifier;
 
-import java.io.*;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
@@ -38,7 +42,6 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.Version;
-import org.apache.mahout.common.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -302,7 +305,7 @@ public final class BayesFileFormatter {
       if (cmdLine.hasOption(analyzerOpt)) {
         analyzer = Class.forName((String) cmdLine.getValue(analyzerOpt)).asSubclass(Analyzer.class).newInstance();
       } else {
-        analyzer = new StandardAnalyzer(Version.LUCENE_30);
+        analyzer = new StandardAnalyzer(Version.LUCENE_31);
       }
       Charset charset = Charsets.UTF_8;
       if (cmdLine.hasOption(charsetOpt)) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Sat Jun  4
15:47:07 2011
@@ -151,7 +151,7 @@ public final class Classify {
       analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
     }
     if (analyzer == null) {
-      analyzer = new StandardAnalyzer(Version.LUCENE_30);
+      analyzer = new StandardAnalyzer(Version.LUCENE_31);
     }
     
     log.info("Converting input document to proper format");

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapperTest.java
Sat Jun  4 15:47:07 2011
@@ -86,7 +86,7 @@ public final class CollocMapperTest exte
     for (String[] v : values) {
       Type p = v[0].startsWith("h") ? Gram.Type.HEAD : Gram.Type.TAIL;
       int frequency = 1;
-      if (v[1].equals("of times")) {
+      if ("of times".equals(v[1])) {
         frequency = 2;
       }
       
@@ -150,8 +150,8 @@ public final class CollocMapperTest exte
       Type p = v[0].startsWith("h") ? Gram.Type.HEAD : Gram.Type.TAIL;
       p = v[0].startsWith("u") ? Gram.Type.UNIGRAM : p;
       int frequency = 1;
-      if (v[1].equals("of times") || v[1].equals("of") || v[1].equals("times")
-          || v[1].equals("the")) {
+      if ("of times".equals(v[1]) || "of".equals(v[1]) || "times".equals(v[1])
+          || "the".equals(v[1])) {
         frequency = 2;
       }
       
@@ -190,7 +190,7 @@ public final class CollocMapperTest exte
     private final Analyzer a;
     
     public TestAnalyzer() {
-      a = new StandardAnalyzer(Version.LUCENE_29, Collections.emptySet());
+      a = new StandardAnalyzer(Version.LUCENE_31, Collections.emptySet());
     }
     
     @Override

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
(original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/PrepareTwentyNewsgroups.java
Sat Jun  4 15:47:07 2011
@@ -90,7 +90,7 @@ public final class PrepareTwentyNewsgrou
       try {
         analyzer = Class.forName(analyzerName).asSubclass(Analyzer.class).newInstance();
       } catch (InstantiationException e) {
-        analyzer = (Analyzer) Class.forName(analyzerName).getConstructor(Version.class).newInstance(Version.LUCENE_30);
+        analyzer = (Analyzer) Class.forName(analyzerName).getConstructor(Version.class).newInstance(Version.LUCENE_31);
       }
       // parent dir contains dir by category
       if (!parentDir.exists()) {

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
(original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
Sat Jun  4 15:47:07 2011
@@ -122,7 +122,7 @@ public final class TrainNewsGroups {
     new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ENGLISH)
   };
 
-  private static final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
+  private static final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
   private static final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
   private static final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");
   private static Multiset<String> overallCounts;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
(original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
Sat Jun  4 15:47:07 2011
@@ -35,7 +35,7 @@ import org.apache.lucene.analysis.tokena
 /**
  * Emits tokens based on bloom filter membership.
  */
-public class BloomTokenFilter extends TokenFilter {
+public final class BloomTokenFilter extends TokenFilter {
   
   private final Filter filter;
   private final TermAttribute termAtt;

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Sat Jun  4 15:47:07 2011
@@ -110,7 +110,7 @@ public final class TestClusterDumper ext
     sampleData = new ArrayList<VectorWritable>();
     RAMDirectory directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(directory,
-                                         new StandardAnalyzer(Version.LUCENE_30),
+                                         new StandardAnalyzer(Version.LUCENE_31),
                                          true,
                                          IndexWriter.MaxFieldLength.UNLIMITED);
     try {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
Sat Jun  4 15:47:07 2011
@@ -102,7 +102,7 @@ public final class TestL1ModelClustering
     sampleData = new ArrayList<VectorWritable>();
     RAMDirectory directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(directory,
-                                         new StandardAnalyzer(Version.LUCENE_30),
+                                         new StandardAnalyzer(Version.LUCENE_31),
                                          true,
                                          IndexWriter.MaxFieldLength.UNLIMITED);
     try {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
Sat Jun  4 15:47:07 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.Version;
 import org.apache.mahout.utils.MahoutTestCase;
 import org.junit.Test;
 
@@ -76,7 +77,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testAnalyzer() throws IOException {
     StringReader reader = new StringReader(input);
-    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
+    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
     TokenStream ts = analyzer.tokenStream(null, reader);
     validateTokens(allTokens, ts);
   }
@@ -85,7 +86,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testNonKeepdAnalyzer() throws IOException {
     StringReader reader = new StringReader(input);
-    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
+    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
     TokenStream ts = analyzer.tokenStream(null, reader);
     BloomTokenFilter f = new BloomTokenFilter(getFilter(filterTokens), false /* toss matching
tokens */, ts);
     validateTokens(expectedNonKeepTokens, f);
@@ -95,7 +96,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testKeepAnalyzer() throws IOException {
     StringReader reader = new StringReader(input);
-    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
+    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
     TokenStream ts = analyzer.tokenStream(null, reader);
     BloomTokenFilter f = new BloomTokenFilter(getFilter(filterTokens), true /* keep matching
tokens */, ts);
     validateTokens(expectedKeepTokens, f);
@@ -105,7 +106,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testShingleFilteredAnalyzer() throws IOException {
     StringReader reader = new StringReader(input);
-    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
+    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_31);
     TokenStream ts = analyzer.tokenStream(null, reader);
     ShingleFilter sf = new ShingleFilter(ts, 3);
     BloomTokenFilter f = new BloomTokenFilter(getFilter(shingleKeepTokens),  true, sf);

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=1131417&r1=1131416&r2=1131417&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
(original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Sat Jun  4 15:47:07 2011
@@ -170,7 +170,7 @@ public final class LuceneIterableTest ex
                                               int startingId) throws IOException {
     IndexWriter writer = new IndexWriter(
         directory,
-        new StandardAnalyzer(Version.LUCENE_30),
+        new StandardAnalyzer(Version.LUCENE_31),
         createNew,
         IndexWriter.MaxFieldLength.UNLIMITED);
     try {



Mime
View raw message